From 65005606ec6ccd2d337c8dd150cc030d14b0aca9 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Sat, 18 Dec 2021 18:54:53 -0500 Subject: [PATCH] More regression refactoring (#1708) More work on #1680 --- docs/regressions-backgroundlinking18.md | 56 +++-- docs/regressions-backgroundlinking19.md | 56 +++-- docs/regressions-backgroundlinking20.md | 56 +++-- docs/regressions-car17v1.5.md | 89 +++---- docs/regressions-car17v2.0-doc2query.md | 77 +++--- docs/regressions-car17v2.0.md | 77 +++--- docs/regressions-clef06-fr.md | 22 +- docs/regressions-core17.md | 75 +++--- docs/regressions-core18.md | 75 +++--- docs/regressions-cw09b.md | 187 ++++++++------- docs/regressions-cw12.md | 93 +++---- docs/regressions-cw12b13.md | 133 ++++++----- docs/regressions-disk12.md | 183 +++++++------- docs/regressions-disk45.md | 183 +++++++------- ...ressions-dl19-doc-docTTTTTquery-per-doc.md | 55 +++-- ...ions-dl19-doc-docTTTTTquery-per-passage.md | 55 +++-- docs/regressions-dl19-doc-per-passage.md | 99 ++++---- docs/regressions-dl19-doc.md | 99 ++++---- .../regressions-dl19-passage-docTTTTTquery.md | 77 +++--- docs/regressions-dl19-passage.md | 99 ++++---- ...ressions-dl20-doc-docTTTTTquery-per-doc.md | 57 +++-- ...ions-dl20-doc-docTTTTTquery-per-passage.md | 57 +++-- docs/regressions-dl20-doc-per-passage.md | 101 ++++---- docs/regressions-dl20-doc.md | 79 +++--- .../regressions-dl20-passage-docTTTTTquery.md | 79 +++--- docs/regressions-dl20-passage.md | 101 ++++---- ...-dl21-doc-segmented-unicoil-noexp-0shot.md | 22 +- docs/regressions-dl21-doc-segmented.md | 55 +++-- docs/regressions-dl21-doc.md | 55 +++-- docs/regressions-dl21-passage-augmented.md | 55 +++-- ...ssions-dl21-passage-unicoil-noexp-0shot.md | 22 +- docs/regressions-dl21-passage.md | 55 +++-- docs/regressions-fever.md | 31 +-- docs/regressions-fire12-bn.md | 22 +- docs/regressions-fire12-en.md | 22 +- docs/regressions-fire12-hi.md | 22 +- docs/regressions-gov2.md | 183 +++++++------- docs/regressions-mb11.md | 129 +++++----- docs/regressions-mb13.md | 129 +++++----- docs/regressions-mrtydi-v1.1-ar.md | 38 +-- docs/regressions-mrtydi-v1.1-bn.md | 38 +-- docs/regressions-mrtydi-v1.1-en.md | 38 +-- docs/regressions-mrtydi-v1.1-fi.md | 38 +-- docs/regressions-mrtydi-v1.1-id.md | 38 +-- docs/regressions-mrtydi-v1.1-ja.md | 38 +-- docs/regressions-mrtydi-v1.1-ko.md | 38 +-- docs/regressions-mrtydi-v1.1-ru.md | 38 +-- docs/regressions-mrtydi-v1.1-sw.md | 38 +-- docs/regressions-mrtydi-v1.1-te.md | 38 +-- docs/regressions-mrtydi-v1.1-th.md | 38 +-- ...sions-msmarco-doc-docTTTTTquery-per-doc.md | 31 +-- ...smarco-doc-docTTTTTquery-per-passage-v3.md | 31 +-- ...s-msmarco-doc-docTTTTTquery-per-passage.md | 31 +-- .../regressions-msmarco-doc-per-passage-v2.md | 97 ++++---- .../regressions-msmarco-doc-per-passage-v3.md | 97 ++++---- docs/regressions-msmarco-doc-per-passage.md | 97 ++++---- docs/regressions-msmarco-doc.md | 75 +++--- .../regressions-msmarco-passage-deepimpact.md | 20 +- ...ions-msmarco-passage-distill-splade-max.md | 20 +- docs/regressions-msmarco-passage-doc2query.md | 53 ++-- ...gressions-msmarco-passage-docTTTTTquery.md | 75 +++--- ...msmarco-passage-unicoil-tilde-expansion.md | 20 +- docs/regressions-msmarco-passage-unicoil.md | 20 +- docs/regressions-msmarco-passage.md | 97 ++++---- ...co-v2-doc-segmented-unicoil-noexp-0shot.md | 31 +-- docs/regressions-msmarco-v2-doc-segmented.md | 91 +++---- docs/regressions-msmarco-v2-doc.md | 91 +++---- ...egressions-msmarco-v2-passage-augmented.md | 91 +++---- ...-msmarco-v2-passage-unicoil-noexp-0shot.md | 31 +-- docs/regressions-msmarco-v2-passage.md | 91 +++---- docs/regressions-ntcir8-zh.md | 22 +- docs/regressions-robust05.md | 75 +++--- docs/regressions-trec02-ar.md | 22 +- docs/regressions-wt10g.md | 75 +++--- src/main/python/run_regression.py | 26 +- .../regression/backgroundlinking18.yaml | 44 ++-- .../regression/backgroundlinking19.yaml | 44 ++-- .../regression/backgroundlinking20.yaml | 44 ++-- src/main/resources/regression/cacm.yaml | 69 ++---- src/main/resources/regression/car17v1.5.yaml | 73 +++--- .../regression/car17v2.0-doc2query.yaml | 69 ++---- src/main/resources/regression/car17v2.0.yaml | 69 ++---- src/main/resources/regression/clef06-fr.yaml | 35 ++- src/main/resources/regression/core17.yaml | 69 ++---- src/main/resources/regression/core18.yaml | 69 ++---- src/main/resources/regression/cw09b.yaml | 106 ++++---- src/main/resources/regression/cw12.yaml | 76 +++--- src/main/resources/regression/cw12b13.yaml | 105 ++++---- src/main/resources/regression/disk12.yaml | 69 ++---- src/main/resources/regression/disk45.yaml | 69 ++---- .../dl19-doc-docTTTTTquery-per-doc.yaml | 62 ++--- .../dl19-doc-docTTTTTquery-per-passage.yaml | 61 ++--- .../regression/dl19-doc-per-passage.yaml | 105 +++----- src/main/resources/regression/dl19-doc.yaml | 106 +++----- .../dl19-passage-docTTTTTquery.yaml | 77 +++--- .../resources/regression/dl19-passage.yaml | 98 +++----- .../dl20-doc-docTTTTTquery-per-doc.yaml | 74 +++--- .../dl20-doc-docTTTTTquery-per-passage.yaml | 73 +++--- .../regression/dl20-doc-per-passage.yaml | 125 ++++------ src/main/resources/regression/dl20-doc.yaml | 99 +++----- .../dl20-passage-docTTTTTquery.yaml | 97 +++----- .../resources/regression/dl20-passage.yaml | 122 ++++------ ...l21-doc-segmented-unicoil-noexp-0shot.yaml | 37 ++- .../regression/dl21-doc-segmented.yaml | 61 ++--- src/main/resources/regression/dl21-doc.yaml | 61 ++--- .../regression/dl21-passage-augmented.yaml | 59 ++--- .../dl21-passage-unicoil-noexp-0shot.yaml | 36 ++- .../resources/regression/dl21-passage.yaml | 57 ++--- src/main/resources/regression/fever.yaml | 27 +-- src/main/resources/regression/fire12-bn.yaml | 37 ++- src/main/resources/regression/fire12-en.yaml | 36 ++- src/main/resources/regression/fire12-hi.yaml | 36 ++- src/main/resources/regression/gov2.yaml | 74 +++--- src/main/resources/regression/mb11.yaml | 80 +++---- src/main/resources/regression/mb13.yaml | 80 +++---- .../resources/regression/mrtydi-v1.1-ar.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-bn.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-en.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-fi.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-id.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-ja.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-ko.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-ru.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-sw.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-te.yaml | 28 +-- .../resources/regression/mrtydi-v1.1-th.yaml | 28 +-- .../msmarco-doc-docTTTTTquery-per-doc.yaml | 36 ++- ...arco-doc-docTTTTTquery-per-passage-v3.yaml | 37 ++- ...msmarco-doc-docTTTTTquery-per-passage.yaml | 37 ++- .../msmarco-doc-per-passage-v2.yaml | 89 ++----- .../msmarco-doc-per-passage-v3.yaml | 89 ++----- .../regression/msmarco-doc-per-passage.yaml | 89 ++----- .../resources/regression/msmarco-doc.yaml | 65 ++--- .../msmarco-passage-deepimpact.yaml | 29 +-- .../msmarco-passage-distill-splade-max.yaml | 29 +-- .../regression/msmarco-passage-doc2query.yaml | 46 ++-- .../msmarco-passage-docTTTTTquery.yaml | 61 ++--- ...marco-passage-unicoil-tilde-expansion.yaml | 37 ++- .../regression/msmarco-passage-unicoil.yaml | 29 +-- .../resources/regression/msmarco-passage.yaml | 78 ++---- ...-v2-doc-segmented-unicoil-noexp-0shot.yaml | 33 +-- .../regression/msmarco-v2-doc-segmented.yaml | 57 ++--- .../resources/regression/msmarco-v2-doc.yaml | 53 ++-- .../msmarco-v2-passage-augmented.yaml | 53 ++-- ...smarco-v2-passage-unicoil-noexp-0shot.yaml | 32 +-- .../regression/msmarco-v2-passage.yaml | 53 ++-- src/main/resources/regression/ntcir8-zh.yaml | 36 ++- src/main/resources/regression/robust05.yaml | 71 +++--- src/main/resources/regression/trec02-ar.yaml | 36 ++- src/main/resources/regression/wt10g.yaml | 73 +++--- src/test/java/io/anserini/doc/DataModel.java | 226 +++++++----------- .../doc/GenerateRegressionDocsTest.java | 4 +- .../doc/JDIQ2018EffectivenessDocsTest.java | 5 +- 153 files changed, 4436 insertions(+), 5095 deletions(-) diff --git a/docs/regressions-backgroundlinking18.md b/docs/regressions-backgroundlinking18.md index dcda034da9..67c1aa8001 100644 --- a/docs/regressions-backgroundlinking18.md +++ b/docs/regressions-backgroundlinking18.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection WashingtonPostCollection \ - -input /path/to/wapo.v2 \ - -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -generator WashingtonPostGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection WashingtonPostCollection \ + -input /path/to/wapo.v2 \ + -index indexes/lucene-index.wapo.v2 \ + -generator WashingtonPostGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.wapo.v2 & ``` @@ -32,42 +33,45 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt \ - -output runs/run.wapo.v2.bm25.topics.backgroundlinking18.txt \ - -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt \ - -output runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking18.txt \ - -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt \ - -output runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking18.txt \ - -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v2.bm25.topics.backgroundlinking18.txt \ + -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking18.txt \ + -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking18.txt \ + -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & ``` Evaluation can be performed using `trec_eval`: ``` -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking18.txt runs/run.wapo.v2.bm25.topics.backgroundlinking18.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking18.txt runs/run.wapo.v2.bm25.topics.backgroundlinking18.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking18.txt runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking18.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking18.txt runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking18.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking18.txt runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking18.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking18.txt runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking18.txt ``` ## Effectiveness With the above commands, you should be able to reproduce the following results: -NCDG@5 | BM25 | +RM3 | +RM3+DF | +MAP | BM25 | +RM3 | +RM3+DF | :---------------------------------------|-----------|-----------|-----------| -[TREC 2018 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt)| 0.3293 | 0.3526 | 0.4171 | +[TREC 2018 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt)| 0.2490 | 0.2642 | 0.2692 | -AP | BM25 | +RM3 | +RM3+DF | +nDCG@5 | BM25 | +RM3 | +RM3+DF | :---------------------------------------|-----------|-----------|-----------| -[TREC 2018 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt)| 0.2490 | 0.2642 | 0.2692 | +[TREC 2018 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt)| 0.3293 | 0.3526 | 0.4171 | diff --git a/docs/regressions-backgroundlinking19.md b/docs/regressions-backgroundlinking19.md index 675106270b..dbeec9cd53 100644 --- a/docs/regressions-backgroundlinking19.md +++ b/docs/regressions-backgroundlinking19.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection WashingtonPostCollection \ - -input /path/to/wapo.v2 \ - -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -generator WashingtonPostGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection WashingtonPostCollection \ + -input /path/to/wapo.v2 \ + -index indexes/lucene-index.wapo.v2 \ + -generator WashingtonPostGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.wapo.v2 & ``` @@ -32,42 +33,45 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt \ - -output runs/run.wapo.v2.bm25.topics.backgroundlinking19.txt \ - -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt \ - -output runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking19.txt \ - -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt \ - -output runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking19.txt \ - -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v2.bm25.topics.backgroundlinking19.txt \ + -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking19.txt \ + -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking19.txt \ + -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & ``` Evaluation can be performed using `trec_eval`: ``` -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking19.txt runs/run.wapo.v2.bm25.topics.backgroundlinking19.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking19.txt runs/run.wapo.v2.bm25.topics.backgroundlinking19.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking19.txt runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking19.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking19.txt runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking19.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking19.txt runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking19.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking19.txt runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking19.txt ``` ## Effectiveness With the above commands, you should be able to reproduce the following results: -NCDG@5 | BM25 | +RM3 | +RM3+DF | +MAP | BM25 | +RM3 | +RM3+DF | :---------------------------------------|-----------|-----------|-----------| -[TREC 2019 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt)| 0.4785 | 0.5217 | 0.5051 | +[TREC 2019 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt)| 0.3029 | 0.3786 | 0.3154 | -AP | BM25 | +RM3 | +RM3+DF | +nDCG@5 | BM25 | +RM3 | +RM3+DF | :---------------------------------------|-----------|-----------|-----------| -[TREC 2019 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt)| 0.3029 | 0.3786 | 0.3154 | +[TREC 2019 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt)| 0.4785 | 0.5217 | 0.5051 | diff --git a/docs/regressions-backgroundlinking20.md b/docs/regressions-backgroundlinking20.md index 7b37254a97..9905891d01 100644 --- a/docs/regressions-backgroundlinking20.md +++ b/docs/regressions-backgroundlinking20.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection WashingtonPostCollection \ - -input /path/to/wapo.v3 \ - -index indexes/lucene-index.wapo.v3.pos+docvectors+raw \ - -generator WashingtonPostGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection WashingtonPostCollection \ + -input /path/to/wapo.v3 \ + -index indexes/lucene-index.wapo.v3 \ + -generator WashingtonPostGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.wapo.v3 & ``` @@ -32,42 +33,45 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v3.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt \ - -output runs/run.wapo.v3.bm25.topics.backgroundlinking20.txt \ - -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v3.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt \ - -output runs/run.wapo.v3.bm25+rm3.topics.backgroundlinking20.txt \ - -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v3.pos+docvectors+raw \ - -topicreader BackgroundLinking -topics src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt \ - -output runs/run.wapo.v3.bm25+rm3+df.topics.backgroundlinking20.txt \ - -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v3 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v3.bm25.topics.backgroundlinking20.txt \ + -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v3 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v3.bm25+rm3.topics.backgroundlinking20.txt \ + -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v3 \ + -topics src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt -topicreader BackgroundLinking \ + -output runs/run.wapo.v3.bm25+rm3+df.topics.backgroundlinking20.txt \ + -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 & ``` Evaluation can be performed using `trec_eval`: ``` -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking20.txt runs/run.wapo.v3.bm25.topics.backgroundlinking20.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking20.txt runs/run.wapo.v3.bm25.topics.backgroundlinking20.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking20.txt runs/run.wapo.v3.bm25+rm3.topics.backgroundlinking20.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking20.txt runs/run.wapo.v3.bm25+rm3.topics.backgroundlinking20.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m ndcg_cut.5 -c -M1000 -m map src/main/resources/topics-and-qrels/qrels.backgroundlinking20.txt runs/run.wapo.v3.bm25+rm3+df.topics.backgroundlinking20.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M1000 -m map -c -M1000 -m ndcg_cut.5 src/main/resources/topics-and-qrels/qrels.backgroundlinking20.txt runs/run.wapo.v3.bm25+rm3+df.topics.backgroundlinking20.txt ``` ## Effectiveness With the above commands, you should be able to reproduce the following results: -NCDG@5 | BM25 | +RM3 | +RM3+DF | +MAP | BM25 | +RM3 | +RM3+DF | :---------------------------------------|-----------|-----------|-----------| -[TREC 2020 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt)| 0.5231 | 0.5673 | 0.5316 | +[TREC 2020 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt)| 0.3286 | 0.4519 | 0.3438 | -AP | BM25 | +RM3 | +RM3+DF | +nDCG@5 | BM25 | +RM3 | +RM3+DF | :---------------------------------------|-----------|-----------|-----------| -[TREC 2020 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt)| 0.3286 | 0.4519 | 0.3438 | +[TREC 2020 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt)| 0.5231 | 0.5673 | 0.5316 | diff --git a/docs/regressions-car17v1.5.md b/docs/regressions-car17v1.5.md index e5bca0704b..2049086e4d 100644 --- a/docs/regressions-car17v1.5.md +++ b/docs/regressions-car17v1.5.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CarCollection \ - -input /path/to/car-paragraphCorpus.v1.5 \ - -index indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection CarCollection \ + -input /path/to/car-paragraphCorpus.v1.5 \ + -index indexes/lucene-index.car-paragraphCorpus.v1.5 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.car-paragraphCorpus.v1.5 & ``` @@ -33,51 +34,57 @@ Specifically, this is the section-level passage retrieval task with automatic gr After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v1.5.bm25.topics.car17v1.5.benchmarkY1test.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v1.5.bm25+rm3.topics.car17v1.5.benchmarkY1test.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v1.5.bm25+ax.topics.car17v1.5.benchmarkY1test.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v1.5.ql.topics.car17v1.5.benchmarkY1test.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v1.5.ql+rm3.topics.car17v1.5.benchmarkY1test.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v1.5.ql+ax.topics.car17v1.5.benchmarkY1test.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v1.5 \ + -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v1.5.bm25.topics.car17v1.5.benchmarkY1test.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v1.5 \ + -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v1.5.bm25+rm3.topics.car17v1.5.benchmarkY1test.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v1.5 \ + -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v1.5.bm25+ax.topics.car17v1.5.benchmarkY1test.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v1.5 \ + -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v1.5.ql.topics.car17v1.5.benchmarkY1test.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v1.5 \ + -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v1.5.ql+rm3.topics.car17v1.5.benchmarkY1test.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v1.5 \ + -topics src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v1.5.ql+ax.topics.car17v1.5.benchmarkY1test.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: ``` -tools/eval/trec_eval.9.0.4/trec_eval -m map -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.bm25.topics.car17v1.5.benchmarkY1test.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m map -c -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.bm25.topics.car17v1.5.benchmarkY1test.txt -tools/eval/trec_eval.9.0.4/trec_eval -m map -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.bm25+rm3.topics.car17v1.5.benchmarkY1test.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m map -c -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.bm25+rm3.topics.car17v1.5.benchmarkY1test.txt -tools/eval/trec_eval.9.0.4/trec_eval -m map -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.bm25+ax.topics.car17v1.5.benchmarkY1test.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m map -c -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.bm25+ax.topics.car17v1.5.benchmarkY1test.txt -tools/eval/trec_eval.9.0.4/trec_eval -m map -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.ql.topics.car17v1.5.benchmarkY1test.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m map -c -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.ql.topics.car17v1.5.benchmarkY1test.txt -tools/eval/trec_eval.9.0.4/trec_eval -m map -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.ql+rm3.topics.car17v1.5.benchmarkY1test.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m map -c -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.ql+rm3.topics.car17v1.5.benchmarkY1test.txt -tools/eval/trec_eval.9.0.4/trec_eval -m map -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.ql+ax.topics.car17v1.5.benchmarkY1test.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m map -c -m recip_rank src/main/resources/topics-and-qrels/qrels.car17v1.5.benchmarkY1test.txt runs/run.car-paragraphCorpus.v1.5.ql+ax.topics.car17v1.5.benchmarkY1test.txt ``` ## Effectiveness @@ -89,6 +96,6 @@ MAP | BM25 | +RM3 | +Ax | QL [TREC 2017 CAR: benchmarkY1test (v1.5)](../src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt/)| 0.1562 | 0.1295 | 0.1358 | 0.1386 | 0.1080 | 0.1048 | -RECIP_RANK | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | +MRR | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [TREC 2017 CAR: benchmarkY1test (v1.5)](../src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt/)| 0.2331 | 0.1923 | 0.1949 | 0.2037 | 0.1599 | 0.1524 | diff --git a/docs/regressions-car17v2.0-doc2query.md b/docs/regressions-car17v2.0-doc2query.md index 47403d7c4e..0dd87a2e3e 100644 --- a/docs/regressions-car17v2.0-doc2query.md +++ b/docs/regressions-car17v2.0-doc2query.md @@ -15,11 +15,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/car-paragraphCorpus.v2.0-doc2query \ - -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 30 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/car-paragraphCorpus.v2.0-doc2query \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query \ + -generator DefaultLuceneDocumentGenerator \ + -threads 30 -storePositions -storeDocvectors -storeRaw \ >& logs/log.car-paragraphCorpus.v2.0-doc2query & ``` @@ -39,35 +40,41 @@ Specifically, this is the section-level passage retrieval task with automatic gr After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0-doc2query.bm25.topics.car17v2.0.benchmarkY1test.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0-doc2query.bm25+rm3.topics.car17v2.0.benchmarkY1test.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0-doc2query.bm25+ax.topics.car17v2.0.benchmarkY1test.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0-doc2query.ql.topics.car17v2.0.benchmarkY1test.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0-doc2query.ql+rm3.topics.car17v2.0.benchmarkY1test.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0-doc2query.ql+ax.topics.car17v2.0.benchmarkY1test.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0-doc2query.bm25.topics.car17v2.0.benchmarkY1test.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0-doc2query.bm25+rm3.topics.car17v2.0.benchmarkY1test.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0-doc2query.bm25+ax.topics.car17v2.0.benchmarkY1test.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0-doc2query.ql.topics.car17v2.0.benchmarkY1test.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0-doc2query.ql+rm3.topics.car17v2.0.benchmarkY1test.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0-doc2query.ql+ax.topics.car17v2.0.benchmarkY1test.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: @@ -95,6 +102,6 @@ MAP | BM25 | +RM3 | +Ax | QL [TREC 2017 CAR: benchmarkY1test (v2.0)](../src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt)| 0.1807 | 0.1521 | 0.1470 | 0.1752 | 0.1453 | 0.1339 | -RECIP_RANK | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | +MRR | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [TREC 2017 CAR: benchmarkY1test (v2.0)](../src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt)| 0.2750 | 0.2275 | 0.2186 | 0.2653 | 0.2156 | 0.1981 | diff --git a/docs/regressions-car17v2.0.md b/docs/regressions-car17v2.0.md index 68ac05c2d4..4ece511e3a 100644 --- a/docs/regressions-car17v2.0.md +++ b/docs/regressions-car17v2.0.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CarCollection \ - -input /path/to/car-paragraphCorpus.v2.0 \ - -index indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection CarCollection \ + -input /path/to/car-paragraphCorpus.v2.0 \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.car-paragraphCorpus.v2.0 & ``` @@ -33,35 +34,41 @@ Specifically, this is the section-level passage retrieval task with automatic gr After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0.bm25.topics.car17v2.0.benchmarkY1test.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0.bm25+rm3.topics.car17v2.0.benchmarkY1test.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0.bm25+ax.topics.car17v2.0.benchmarkY1test.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0.ql.topics.car17v2.0.benchmarkY1test.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0.ql+rm3.topics.car17v2.0.benchmarkY1test.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw \ - -topicreader Car -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt \ - -output runs/run.car-paragraphCorpus.v2.0.ql+ax.topics.car17v2.0.benchmarkY1test.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0 \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0.bm25.topics.car17v2.0.benchmarkY1test.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0 \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0.bm25+rm3.topics.car17v2.0.benchmarkY1test.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0 \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0.bm25+ax.topics.car17v2.0.benchmarkY1test.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0 \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0.ql.topics.car17v2.0.benchmarkY1test.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0 \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0.ql+rm3.topics.car17v2.0.benchmarkY1test.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.car-paragraphCorpus.v2.0 \ + -topics src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt -topicreader Car \ + -output runs/run.car-paragraphCorpus.v2.0.ql+ax.topics.car17v2.0.benchmarkY1test.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: @@ -89,6 +96,6 @@ MAP | BM25 | +RM3 | +Ax | QL [TREC 2017 CAR: benchmarkY1test (v2.0)](../src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt)| 0.1545 | 0.1286 | 0.1364 | 0.1371 | 0.1080 | 0.1077 | -RECIP_RANK | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | +MRR | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [TREC 2017 CAR: benchmarkY1test (v2.0)](../src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt)| 0.2321 | 0.1927 | 0.1978 | 0.2013 | 0.1598 | 0.1588 | diff --git a/docs/regressions-clef06-fr.md b/docs/regressions-clef06-fr.md index c87eb9f728..988de66e5a 100644 --- a/docs/regressions-clef06-fr.md +++ b/docs/regressions-clef06-fr.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/clef06-fr \ - -index indexes/lucene-index.clef06-fr.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw -language fr \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/clef06-fr \ + -index indexes/lucene-index.clef06-fr \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw -language fr \ >& logs/log.clef06-fr & ``` @@ -35,10 +36,11 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.clef06-fr.pos+docvectors+raw \ - -topicreader TsvString -topics src/main/resources/topics-and-qrels/topics.clef06fr.mono.fr.txt \ - -output runs/run.clef06-fr.bm25.topics.clef06fr.mono.fr.txt \ - -language fr -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.clef06-fr \ + -topics src/main/resources/topics-and-qrels/topics.clef06fr.mono.fr.txt -topicreader TsvString \ + -output runs/run.clef06-fr.bm25.topics.clef06fr.mono.fr.txt \ + -bm25 -language fr & ``` Evaluation can be performed using `trec_eval`: @@ -61,6 +63,6 @@ P20 | BM25 | [CLEF 2006 (Monolingual French)](../src/main/resources/topics-and-qrels/topics.clef06fr.mono.fr.txt)| 0.3184 | -NDCG20 | BM25 | +nDCG@20 | BM25 | :---------------------------------------|-----------| [CLEF 2006 (Monolingual French)](../src/main/resources/topics-and-qrels/topics.clef06fr.mono.fr.txt)| 0.4458 | diff --git a/docs/regressions-core17.md b/docs/regressions-core17.md index 24fdc25474..364e516bb2 100644 --- a/docs/regressions-core17.md +++ b/docs/regressions-core17.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection NewYorkTimesCollection \ - -input /path/to/nyt \ - -index indexes/lucene-index.nyt.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection NewYorkTimesCollection \ + -input /path/to/nyt \ + -index indexes/lucene-index.nyt \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.nyt & ``` @@ -32,35 +33,41 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.nyt.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core17.txt \ - -output runs/run.nyt.bm25.topics.core17.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.nyt.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core17.txt \ - -output runs/run.nyt.bm25+rm3.topics.core17.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.nyt.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core17.txt \ - -output runs/run.nyt.bm25+ax.topics.core17.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.nyt.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core17.txt \ - -output runs/run.nyt.ql.topics.core17.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.nyt.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core17.txt \ - -output runs/run.nyt.ql+rm3.topics.core17.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.nyt.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core17.txt \ - -output runs/run.nyt.ql+ax.topics.core17.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.nyt \ + -topics src/main/resources/topics-and-qrels/topics.core17.txt -topicreader Trec \ + -output runs/run.nyt.bm25.topics.core17.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.nyt \ + -topics src/main/resources/topics-and-qrels/topics.core17.txt -topicreader Trec \ + -output runs/run.nyt.bm25+rm3.topics.core17.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.nyt \ + -topics src/main/resources/topics-and-qrels/topics.core17.txt -topicreader Trec \ + -output runs/run.nyt.bm25+ax.topics.core17.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.nyt \ + -topics src/main/resources/topics-and-qrels/topics.core17.txt -topicreader Trec \ + -output runs/run.nyt.ql.topics.core17.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.nyt \ + -topics src/main/resources/topics-and-qrels/topics.core17.txt -topicreader Trec \ + -output runs/run.nyt.ql+rm3.topics.core17.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.nyt \ + -topics src/main/resources/topics-and-qrels/topics.core17.txt -topicreader Trec \ + -output runs/run.nyt.ql+ax.topics.core17.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-core18.md b/docs/regressions-core18.md index 060462bf84..01bdf605a5 100644 --- a/docs/regressions-core18.md +++ b/docs/regressions-core18.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection WashingtonPostCollection \ - -input /path/to/wapo.v2 \ - -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -generator WashingtonPostGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection WashingtonPostCollection \ + -input /path/to/wapo.v2 \ + -index indexes/lucene-index.wapo.v2 \ + -generator WashingtonPostGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.wapo.v2 & ``` @@ -32,35 +33,41 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core18.txt \ - -output runs/run.wapo.v2.bm25.topics.core18.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core18.txt \ - -output runs/run.wapo.v2.bm25+rm3.topics.core18.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core18.txt \ - -output runs/run.wapo.v2.bm25+ax.topics.core18.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core18.txt \ - -output runs/run.wapo.v2.ql.topics.core18.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core18.txt \ - -output runs/run.wapo.v2.ql+rm3.topics.core18.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wapo.v2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.core18.txt \ - -output runs/run.wapo.v2.ql+ax.topics.core18.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.core18.txt -topicreader Trec \ + -output runs/run.wapo.v2.bm25.topics.core18.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.core18.txt -topicreader Trec \ + -output runs/run.wapo.v2.bm25+rm3.topics.core18.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.core18.txt -topicreader Trec \ + -output runs/run.wapo.v2.bm25+ax.topics.core18.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.core18.txt -topicreader Trec \ + -output runs/run.wapo.v2.ql.topics.core18.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.core18.txt -topicreader Trec \ + -output runs/run.wapo.v2.ql+rm3.topics.core18.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wapo.v2 \ + -topics src/main/resources/topics-and-qrels/topics.core18.txt -topicreader Trec \ + -output runs/run.wapo.v2.ql+ax.topics.core18.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-cw09b.md b/docs/regressions-cw09b.md index 93b1a91822..070574c23c 100644 --- a/docs/regressions-cw09b.md +++ b/docs/regressions-cw09b.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection ClueWeb09Collection \ - -input /path/to/cw09b \ - -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 44 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection ClueWeb09Collection \ + -input /path/to/cw09b \ + -index indexes/lucene-index.cw09b \ + -generator DefaultLuceneDocumentGenerator \ + -threads 44 -storePositions -storeDocvectors -storeRaw \ >& logs/log.cw09b & ``` @@ -37,83 +38,101 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt \ - -output runs/run.cw09b.bm25.topics.web.51-100.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt \ - -output runs/run.cw09b.bm25.topics.web.101-150.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt \ - -output runs/run.cw09b.bm25.topics.web.151-200.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt \ - -output runs/run.cw09b.bm25+rm3.topics.web.51-100.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt \ - -output runs/run.cw09b.bm25+rm3.topics.web.101-150.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt \ - -output runs/run.cw09b.bm25+rm3.topics.web.151-200.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt \ - -output runs/run.cw09b.bm25+ax.topics.web.51-100.txt \ - -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt \ - -output runs/run.cw09b.bm25+ax.topics.web.101-150.txt \ - -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt \ - -output runs/run.cw09b.bm25+ax.topics.web.151-200.txt \ - -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt \ - -output runs/run.cw09b.ql.topics.web.51-100.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt \ - -output runs/run.cw09b.ql.topics.web.101-150.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt \ - -output runs/run.cw09b.ql.topics.web.151-200.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt \ - -output runs/run.cw09b.ql+rm3.topics.web.51-100.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt \ - -output runs/run.cw09b.ql+rm3.topics.web.101-150.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt \ - -output runs/run.cw09b.ql+rm3.topics.web.151-200.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt \ - -output runs/run.cw09b.ql+ax.topics.web.51-100.txt \ - -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt \ - -output runs/run.cw09b.ql+ax.topics.web.101-150.txt \ - -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw09b.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt \ - -output runs/run.cw09b.ql+ax.topics.web.151-200.txt \ - -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25.topics.web.51-100.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25.topics.web.101-150.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25.topics.web.151-200.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25+rm3.topics.web.51-100.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25+rm3.topics.web.101-150.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25+rm3.topics.web.151-200.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25+ax.topics.web.51-100.txt \ + -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25+ax.topics.web.101-150.txt \ + -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt -topicreader Webxml \ + -output runs/run.cw09b.bm25+ax.topics.web.151-200.txt \ + -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt -topicreader Webxml \ + -output runs/run.cw09b.ql.topics.web.51-100.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt -topicreader Webxml \ + -output runs/run.cw09b.ql.topics.web.101-150.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt -topicreader Webxml \ + -output runs/run.cw09b.ql.topics.web.151-200.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt -topicreader Webxml \ + -output runs/run.cw09b.ql+rm3.topics.web.51-100.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt -topicreader Webxml \ + -output runs/run.cw09b.ql+rm3.topics.web.101-150.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt -topicreader Webxml \ + -output runs/run.cw09b.ql+rm3.topics.web.151-200.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.51-100.txt -topicreader Webxml \ + -output runs/run.cw09b.ql+ax.topics.web.51-100.txt \ + -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.101-150.txt -topicreader Webxml \ + -output runs/run.cw09b.ql+ax.topics.web.101-150.txt \ + -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw09b \ + -topics src/main/resources/topics-and-qrels/topics.web.151-200.txt -topicreader Webxml \ + -output runs/run.cw09b.ql+ax.topics.web.151-200.txt \ + -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval` and `gdeval.pl`: @@ -180,14 +199,14 @@ P30 | BM25 | +RM3 | +Ax | QL [TREC 2012 Web Track (Topics 151-200)](../src/main/resources/topics-and-qrels/topics.web.151-200.txt)| 0.2167 | 0.1927 | 0.2547 | 0.2080 | 0.1980 | 0.2220 | -NDCG20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | +nDCG@20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [TREC 2010 Web Track (Topics 51-100)](../src/main/resources/topics-and-qrels/topics.web.51-100.txt)| 0.1351 | 0.1368 | 0.1767 | 0.1143 | 0.1182 | 0.1495 | [TREC 2011 Web Track (Topics 101-150)](../src/main/resources/topics-and-qrels/topics.web.101-150.txt)| 0.1894 | 0.1915 | 0.1854 | 0.1631 | 0.1449 | 0.1537 | [TREC 2012 Web Track (Topics 151-200)](../src/main/resources/topics-and-qrels/topics.web.151-200.txt)| 0.1015 | 0.0918 | 0.1388 | 0.0875 | 0.0896 | 0.1091 | -ERR20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | +ERR@20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [TREC 2010 Web Track (Topics 51-100)](../src/main/resources/topics-and-qrels/topics.web.51-100.txt)| 0.0733 | 0.0747 | 0.1019 | 0.0599 | 0.0592 | 0.0751 | [TREC 2011 Web Track (Topics 101-150)](../src/main/resources/topics-and-qrels/topics.web.101-150.txt)| 0.0959 | 0.0959 | 0.0950 | 0.0850 | 0.0787 | 0.0861 | diff --git a/docs/regressions-cw12.md b/docs/regressions-cw12.md index c90432c103..1497c025af 100644 --- a/docs/regressions-cw12.md +++ b/docs/regressions-cw12.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection ClueWeb12Collection \ - -input /path/to/cw12 \ - -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 44 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection ClueWeb12Collection \ + -input /path/to/cw12 \ + -index indexes/lucene-index.cw12 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 44 -storePositions -storeDocvectors -storeRaw \ >& logs/log.cw12 & ``` @@ -33,41 +34,49 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12.bm25.topics.web.201-250.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12.bm25.topics.web.251-300.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12.bm25+rm3.topics.web.201-250.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12.bm25+rm3.topics.web.251-300.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12.ql.topics.web.201-250.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12.ql.topics.web.251-300.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12.ql+rm3.topics.web.201-250.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12.ql+rm3.topics.web.251-300.txt \ - -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12.bm25.topics.web.201-250.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12.bm25.topics.web.251-300.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12.bm25+rm3.topics.web.201-250.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12.bm25+rm3.topics.web.251-300.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12.ql.topics.web.201-250.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12.ql.topics.web.251-300.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12.ql+rm3.topics.web.201-250.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12.ql+rm3.topics.web.251-300.txt \ + -qld -rm3 & ``` Evaluation can be performed using `trec_eval` and `gdeval.pl`: @@ -110,13 +119,13 @@ P30 | BM25 | +RM3 | QL | +R [TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)| 0.4547 | 0.4080 | 0.4380 | 0.3813 | -NDCG20 | BM25 | +RM3 | QL | +RM3 | +nDCG@20 | BM25 | +RM3 | QL | +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [TREC 2013 Web Track (Topics 201-250)](../src/main/resources/topics-and-qrels/topics.web.201-250.txt)| 0.2085 | 0.2033 | 0.1993 | 0.1725 | [TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)| 0.2572 | 0.2516 | 0.2220 | 0.2093 | -ERR20 | BM25 | +RM3 | QL | +RM3 | +ERR@20 | BM25 | +RM3 | QL | +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [TREC 2013 Web Track (Topics 201-250)](../src/main/resources/topics-and-qrels/topics.web.201-250.txt)| 0.1283 | 0.1265 | 0.1233 | 0.1007 | [TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)| 0.1616 | 0.1652 | 0.1323 | 0.1249 | diff --git a/docs/regressions-cw12b13.md b/docs/regressions-cw12b13.md index 3c16398953..eef15ab17f 100644 --- a/docs/regressions-cw12b13.md +++ b/docs/regressions-cw12b13.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection ClueWeb12Collection \ - -input /path/to/cw12b13 \ - -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 44 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection ClueWeb12Collection \ + -input /path/to/cw12b13 \ + -index indexes/lucene-index.cw12b13 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 44 -storePositions -storeDocvectors -storeRaw \ >& logs/log.cw12b13 & ``` @@ -33,59 +34,71 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12b13.bm25.topics.web.201-250.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12b13.bm25.topics.web.251-300.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12b13.bm25+rm3.topics.web.201-250.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12b13.bm25+rm3.topics.web.251-300.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12b13.bm25+ax.topics.web.201-250.txt \ - -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12b13.bm25+ax.topics.web.251-300.txt \ - -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12b13.ql.topics.web.201-250.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12b13.ql.topics.web.251-300.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12b13.ql+rm3.topics.web.201-250.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12b13.ql+rm3.topics.web.251-300.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt \ - -output runs/run.cw12b13.ql+ax.topics.web.201-250.txt \ - -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.cw12b13.pos+docvectors+raw \ - -topicreader Webxml -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt \ - -output runs/run.cw12b13.ql+ax.topics.web.251-300.txt \ - -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12b13.bm25.topics.web.201-250.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12b13.bm25.topics.web.251-300.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12b13.bm25+rm3.topics.web.201-250.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12b13.bm25+rm3.topics.web.251-300.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12b13.bm25+ax.topics.web.201-250.txt \ + -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12b13.bm25+ax.topics.web.251-300.txt \ + -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12b13.ql.topics.web.201-250.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12b13.ql.topics.web.251-300.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12b13.ql+rm3.topics.web.201-250.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12b13.ql+rm3.topics.web.251-300.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.201-250.txt -topicreader Webxml \ + -output runs/run.cw12b13.ql+ax.topics.web.201-250.txt \ + -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.cw12b13 \ + -topics src/main/resources/topics-and-qrels/topics.web.251-300.txt -topicreader Webxml \ + -output runs/run.cw12b13.ql+ax.topics.web.251-300.txt \ + -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval` and `gdeval.pl`: @@ -138,13 +151,13 @@ P30 | BM25 | +RM3 | +Ax | QL [TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)| 0.1273 | 0.1207 | 0.1107 | 0.1373 | 0.1173 | 0.1147 | -NDCG20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | +nDCG@20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [TREC 2013 Web Track (Topics 201-250)](../src/main/resources/topics-and-qrels/topics.web.201-250.txt)| 0.1289 | 0.1114 | 0.1311 | 0.1104 | 0.0921 | 0.1113 | [TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)| 0.1183 | 0.1075 | 0.0974 | 0.1176 | 0.1004 | 0.0984 | -ERR20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | +ERR@20 | BM25 | +RM3 | +Ax | QL | +RM3 | +Ax | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [TREC 2013 Web Track (Topics 201-250)](../src/main/resources/topics-and-qrels/topics.web.201-250.txt)| 0.0838 | 0.0752 | 0.0949 | 0.0767 | 0.0552 | 0.0720 | [TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)| 0.1198 | 0.1055 | 0.0925 | 0.1091 | 0.0928 | 0.0879 | diff --git a/docs/regressions-disk12.md b/docs/regressions-disk12.md index d4e7332806..4a3daa9bb3 100644 --- a/docs/regressions-disk12.md +++ b/docs/regressions-disk12.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection TrecCollection \ - -input /path/to/disk12 \ - -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection TrecCollection \ + -input /path/to/disk12 \ + -index indexes/lucene-index.disk12 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.disk12 & ``` @@ -35,83 +36,101 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt \ - -output runs/run.disk12.bm25.topics.adhoc.51-100.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt \ - -output runs/run.disk12.bm25.topics.adhoc.101-150.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt \ - -output runs/run.disk12.bm25.topics.adhoc.151-200.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt \ - -output runs/run.disk12.bm25+rm3.topics.adhoc.51-100.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt \ - -output runs/run.disk12.bm25+rm3.topics.adhoc.101-150.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt \ - -output runs/run.disk12.bm25+rm3.topics.adhoc.151-200.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt \ - -output runs/run.disk12.bm25+ax.topics.adhoc.51-100.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt \ - -output runs/run.disk12.bm25+ax.topics.adhoc.101-150.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt \ - -output runs/run.disk12.bm25+ax.topics.adhoc.151-200.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt \ - -output runs/run.disk12.ql.topics.adhoc.51-100.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt \ - -output runs/run.disk12.ql.topics.adhoc.101-150.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt \ - -output runs/run.disk12.ql.topics.adhoc.151-200.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt \ - -output runs/run.disk12.ql+rm3.topics.adhoc.51-100.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt \ - -output runs/run.disk12.ql+rm3.topics.adhoc.101-150.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt \ - -output runs/run.disk12.ql+rm3.topics.adhoc.151-200.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt \ - -output runs/run.disk12.ql+ax.topics.adhoc.51-100.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt \ - -output runs/run.disk12.ql+ax.topics.adhoc.101-150.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk12.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt \ - -output runs/run.disk12.ql+ax.topics.adhoc.151-200.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt -topicreader Trec \ + -output runs/run.disk12.bm25.topics.adhoc.51-100.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt -topicreader Trec \ + -output runs/run.disk12.bm25.topics.adhoc.101-150.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt -topicreader Trec \ + -output runs/run.disk12.bm25.topics.adhoc.151-200.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt -topicreader Trec \ + -output runs/run.disk12.bm25+rm3.topics.adhoc.51-100.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt -topicreader Trec \ + -output runs/run.disk12.bm25+rm3.topics.adhoc.101-150.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt -topicreader Trec \ + -output runs/run.disk12.bm25+rm3.topics.adhoc.151-200.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt -topicreader Trec \ + -output runs/run.disk12.bm25+ax.topics.adhoc.51-100.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt -topicreader Trec \ + -output runs/run.disk12.bm25+ax.topics.adhoc.101-150.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt -topicreader Trec \ + -output runs/run.disk12.bm25+ax.topics.adhoc.151-200.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt -topicreader Trec \ + -output runs/run.disk12.ql.topics.adhoc.51-100.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt -topicreader Trec \ + -output runs/run.disk12.ql.topics.adhoc.101-150.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt -topicreader Trec \ + -output runs/run.disk12.ql.topics.adhoc.151-200.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt -topicreader Trec \ + -output runs/run.disk12.ql+rm3.topics.adhoc.51-100.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt -topicreader Trec \ + -output runs/run.disk12.ql+rm3.topics.adhoc.101-150.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt -topicreader Trec \ + -output runs/run.disk12.ql+rm3.topics.adhoc.151-200.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt -topicreader Trec \ + -output runs/run.disk12.ql+ax.topics.adhoc.51-100.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.101-150.txt -topicreader Trec \ + -output runs/run.disk12.ql+ax.topics.adhoc.101-150.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk12 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.151-200.txt -topicreader Trec \ + -output runs/run.disk12.ql+ax.topics.adhoc.151-200.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-disk45.md b/docs/regressions-disk45.md index fb88c6e2b4..d4d34013a4 100644 --- a/docs/regressions-disk45.md +++ b/docs/regressions-disk45.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](${template}) Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection TrecCollection \ - -input /path/to/disk45 \ - -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection TrecCollection \ + -input /path/to/disk45 \ + -index indexes/lucene-index.disk45 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.disk45 & ``` @@ -34,83 +35,101 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt \ - -output runs/run.disk45.bm25.topics.adhoc.351-400.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt \ - -output runs/run.disk45.bm25.topics.adhoc.401-450.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust04.txt \ - -output runs/run.disk45.bm25.topics.robust04.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt \ - -output runs/run.disk45.bm25+rm3.topics.adhoc.351-400.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt \ - -output runs/run.disk45.bm25+rm3.topics.adhoc.401-450.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust04.txt \ - -output runs/run.disk45.bm25+rm3.topics.robust04.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt \ - -output runs/run.disk45.bm25+ax.topics.adhoc.351-400.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt \ - -output runs/run.disk45.bm25+ax.topics.adhoc.401-450.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust04.txt \ - -output runs/run.disk45.bm25+ax.topics.robust04.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt \ - -output runs/run.disk45.ql.topics.adhoc.351-400.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt \ - -output runs/run.disk45.ql.topics.adhoc.401-450.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust04.txt \ - -output runs/run.disk45.ql.topics.robust04.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt \ - -output runs/run.disk45.ql+rm3.topics.adhoc.351-400.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt \ - -output runs/run.disk45.ql+rm3.topics.adhoc.401-450.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust04.txt \ - -output runs/run.disk45.ql+rm3.topics.robust04.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt \ - -output runs/run.disk45.ql+ax.topics.adhoc.351-400.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt \ - -output runs/run.disk45.ql+ax.topics.adhoc.401-450.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.disk45.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust04.txt \ - -output runs/run.disk45.ql+ax.topics.robust04.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt -topicreader Trec \ + -output runs/run.disk45.bm25.topics.adhoc.351-400.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt -topicreader Trec \ + -output runs/run.disk45.bm25.topics.adhoc.401-450.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.robust04.txt -topicreader Trec \ + -output runs/run.disk45.bm25.topics.robust04.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt -topicreader Trec \ + -output runs/run.disk45.bm25+rm3.topics.adhoc.351-400.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt -topicreader Trec \ + -output runs/run.disk45.bm25+rm3.topics.adhoc.401-450.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.robust04.txt -topicreader Trec \ + -output runs/run.disk45.bm25+rm3.topics.robust04.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt -topicreader Trec \ + -output runs/run.disk45.bm25+ax.topics.adhoc.351-400.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt -topicreader Trec \ + -output runs/run.disk45.bm25+ax.topics.adhoc.401-450.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.robust04.txt -topicreader Trec \ + -output runs/run.disk45.bm25+ax.topics.robust04.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt -topicreader Trec \ + -output runs/run.disk45.ql.topics.adhoc.351-400.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt -topicreader Trec \ + -output runs/run.disk45.ql.topics.adhoc.401-450.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.robust04.txt -topicreader Trec \ + -output runs/run.disk45.ql.topics.robust04.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt -topicreader Trec \ + -output runs/run.disk45.ql+rm3.topics.adhoc.351-400.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt -topicreader Trec \ + -output runs/run.disk45.ql+rm3.topics.adhoc.401-450.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.robust04.txt -topicreader Trec \ + -output runs/run.disk45.ql+rm3.topics.robust04.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt -topicreader Trec \ + -output runs/run.disk45.ql+ax.topics.adhoc.351-400.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.401-450.txt -topicreader Trec \ + -output runs/run.disk45.ql+ax.topics.adhoc.401-450.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.disk45 \ + -topics src/main/resources/topics-and-qrels/topics.robust04.txt -topicreader Trec \ + -output runs/run.disk45.ql+ax.topics.robust04.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-dl19-doc-docTTTTTquery-per-doc.md b/docs/regressions-dl19-doc-docTTTTTquery-per-doc.md index 60c43d15ec..8ff81c8743 100644 --- a/docs/regressions-dl19-doc-docTTTTTquery-per-doc.md +++ b/docs/regressions-dl19-doc-docTTTTTquery-per-doc.md @@ -20,11 +20,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-docTTTTTquery-per-doc \ - -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-docTTTTTquery-per-doc \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-docTTTTTquery-per-doc & ``` @@ -41,25 +42,29 @@ The original data can be found [here](https://trec.nist.gov/data/deep2019.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default.topics.dl19-doc.txt \ - -bm25 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default+rm3.topics.dl19-doc.txt \ - -bm25 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned.topics.dl19-doc.txt \ - -bm25 -bm25.k1 4.68 -bm25.b 0.87 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned+rm3.topics.dl19-doc.txt \ - -bm25 -bm25.k1 4.68 -bm25.b 0.87 -rm3 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default.topics.dl19-doc.txt \ + -bm25 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default+rm3.topics.dl19-doc.txt \ + -bm25 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned.topics.dl19-doc.txt \ + -bm25 -bm25.k1 4.68 -bm25.b 0.87 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned+rm3.topics.dl19-doc.txt \ + -bm25 -bm25.k1 4.68 -bm25.b 0.87 -rm3 -hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -88,7 +93,7 @@ R@100 | BM25 (default)| +RM3 | BM25 (tune [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.4198 | 0.4465 | 0.3992 | 0.4119 | -NDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | +nDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.5968 | 0.5895 | 0.5967 | 0.6075 | diff --git a/docs/regressions-dl19-doc-docTTTTTquery-per-passage.md b/docs/regressions-dl19-doc-docTTTTTquery-per-passage.md index 05100ab525..c9fafd645b 100644 --- a/docs/regressions-dl19-doc-docTTTTTquery-per-passage.md +++ b/docs/regressions-dl19-doc-docTTTTTquery-per-passage.md @@ -21,11 +21,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-docTTTTTquery-per-passage \ - -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-docTTTTTquery-per-passage \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-docTTTTTquery-per-passage & ``` @@ -42,25 +43,29 @@ The original data can be found [here](https://trec.nist.gov/data/deep2019.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default.topics.dl19-doc.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default+rm3.topics.dl19-doc.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned.topics.dl19-doc.txt \ - -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned+rm3.topics.dl19-doc.txt \ - -bm25 -bm25.k1 2.56 -bm25.b 0.59 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default.topics.dl19-doc.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default+rm3.topics.dl19-doc.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned.topics.dl19-doc.txt \ + -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned+rm3.topics.dl19-doc.txt \ + -bm25 -bm25.k1 2.56 -bm25.b 0.59 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -89,7 +94,7 @@ R@100 | BM25 (default)| +RM3 | BM25 (tune [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.4092 | 0.4394 | 0.4020 | 0.4235 | -NDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | +nDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.6099 | 0.6318 | 0.6271 | 0.6256 | diff --git a/docs/regressions-dl19-doc-per-passage.md b/docs/regressions-dl19-doc-per-passage.md index a092a0cd7d..257d8bd76e 100644 --- a/docs/regressions-dl19-doc-per-passage.md +++ b/docs/regressions-dl19-doc-per-passage.md @@ -21,11 +21,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-per-passage \ - -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-per-passage \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-per-passage & ``` @@ -42,45 +43,53 @@ The original data can be found [here](https://trec.nist.gov/data/deep2019.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default.topics.dl19-doc.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+rm3.topics.dl19-doc.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+ax.topics.dl19-doc.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+prf.topics.dl19-doc.txt \ - -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned.topics.dl19-doc.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+rm3.topics.dl19-doc.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+ax.topics.dl19-doc.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+prf.topics.dl19-doc.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default.topics.dl19-doc.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+rm3.topics.dl19-doc.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+ax.topics.dl19-doc.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+prf.topics.dl19-doc.txt \ + -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned.topics.dl19-doc.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+rm3.topics.dl19-doc.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+ax.topics.dl19-doc.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+prf.topics.dl19-doc.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -117,7 +126,7 @@ R@100 | BM25 (default)| +RM3 | +Ax [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.3840 | 0.4356 | 0.4501 | 0.4477 | 0.3903 | 0.4126 | 0.4437 | 0.4362 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------| [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.5276 | 0.5750 | 0.5590 | 0.5591 | 0.5364 | 0.5379 | 0.5546 | 0.5478 | diff --git a/docs/regressions-dl19-doc.md b/docs/regressions-dl19-doc.md index f2f02eab5a..cd8ef510a6 100644 --- a/docs/regressions-dl19-doc.md +++ b/docs/regressions-dl19-doc.md @@ -20,11 +20,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/msmarco-doc \ - -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/msmarco-doc \ + -index indexes/lucene-index.msmarco-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc & ``` @@ -41,45 +42,53 @@ The original data can be found [here](https://trec.nist.gov/data/deep2019.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-default.topics.dl19-doc.txt \ - -bm25 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-default+rm3.topics.dl19-doc.txt \ - -bm25 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-default+ax.topics.dl19-doc.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-default+prf.topics.dl19-doc.txt \ - -bm25 -bm25prf -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-tuned.topics.dl19-doc.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-tuned+rm3.topics.dl19-doc.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-tuned+ax.topics.dl19-doc.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 -axiom -axiom.deterministic -rerankCutoff 20 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt \ - -output runs/run.msmarco-doc.bm25-tuned+prf.topics.dl19-doc.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 -bm25prf -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-default.topics.dl19-doc.txt \ + -bm25 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-default+rm3.topics.dl19-doc.txt \ + -bm25 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-default+ax.topics.dl19-doc.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-default+prf.topics.dl19-doc.txt \ + -bm25 -bm25prf -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned.topics.dl19-doc.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned+rm3.topics.dl19-doc.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned+ax.topics.dl19-doc.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 -axiom -axiom.deterministic -rerankCutoff 20 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl19-doc.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned+prf.topics.dl19-doc.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 -bm25prf -hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -116,7 +125,7 @@ R@100 | BM25 (default)| +RM3 | +Ax [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.3948 | 0.4189 | 0.3945 | 0.4004 | 0.3862 | 0.4193 | 0.4399 | 0.4287 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------| [DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)| 0.5190 | 0.5169 | 0.4730 | 0.5105 | 0.5140 | 0.5485 | 0.5245 | 0.5280 | diff --git a/docs/regressions-dl19-passage-docTTTTTquery.md b/docs/regressions-dl19-passage-docTTTTTquery.md index 13991bdcbc..fceebed0f0 100644 --- a/docs/regressions-dl19-passage-docTTTTTquery.md +++ b/docs/regressions-dl19-passage-docTTTTTquery.md @@ -14,11 +14,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-passage-docTTTTTquery \ - -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 9 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-passage-docTTTTTquery \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -generator DefaultLuceneDocumentGenerator \ + -threads 9 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-passage-docTTTTTquery & ``` @@ -36,35 +37,41 @@ The original data can be found [here](https://trec.nist.gov/data/deep2019.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-default.topics.dl19-passage.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-default+rm3.topics.dl19-passage.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned.topics.dl19-passage.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned+rm3.topics.dl19-passage.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2.topics.dl19-passage.txt \ - -bm25 -bm25.k1 2.18 -bm25.b 0.86 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2+rm3.topics.dl19-passage.txt \ - -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-default.topics.dl19-passage.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-default+rm3.topics.dl19-passage.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned.topics.dl19-passage.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned+rm3.topics.dl19-passage.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2.topics.dl19-passage.txt \ + -bm25 -bm25.k1 2.18 -bm25.b 0.86 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2+rm3.topics.dl19-passage.txt \ + -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 & ``` Evaluation can be performed using `trec_eval`: @@ -109,7 +116,7 @@ R@1000 | BM25 (default)| +RM3 | BM25 (tune [DL19 (Passage)](https://trec.nist.gov/data/deep2019.html)| 0.8310 | 0.8861 | 0.8269 | 0.8826 | 0.8134 | 0.8424 | -NDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | +nDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [DL19 (Passage)](https://trec.nist.gov/data/deep2019.html)| 0.6417 | 0.6548 | 0.6482 | 0.6614 | 0.6336 | 0.6528 | diff --git a/docs/regressions-dl19-passage.md b/docs/regressions-dl19-passage.md index ed350b3242..7589791834 100644 --- a/docs/regressions-dl19-passage.md +++ b/docs/regressions-dl19-passage.md @@ -13,11 +13,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-passage \ - -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 9 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-passage \ + -index indexes/lucene-index.msmarco-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 9 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-passage & ``` @@ -35,45 +36,53 @@ The original data can be found [here](https://trec.nist.gov/data/deep2019.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-default.topics.dl19-passage.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-default+rm3.topics.dl19-passage.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-default+ax.topics.dl19-passage.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-default+prf.topics.dl19-passage.txt \ - -bm25 -bm25prf & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-tuned.topics.dl19-passage.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-tuned+rm3.topics.dl19-passage.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-tuned+ax.topics.dl19-passage.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt \ - -output runs/run.msmarco-passage.bm25-tuned+prf.topics.dl19-passage.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default.topics.dl19-passage.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+rm3.topics.dl19-passage.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+ax.topics.dl19-passage.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+prf.topics.dl19-passage.txt \ + -bm25 -bm25prf & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned.topics.dl19-passage.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+rm3.topics.dl19-passage.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+ax.topics.dl19-passage.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl19-passage.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+prf.topics.dl19-passage.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -126,7 +135,7 @@ R@1000 | BM25 (default)| +RM3 | +Ax [DL19 (Passage)](https://trec.nist.gov/data/deep2019.html)| 0.7501 | 0.7998 | 0.8241 | 0.7929 | 0.7450 | 0.7792 | 0.8138 | 0.7988 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------| [DL19 (Passage)](https://trec.nist.gov/data/deep2019.html)| 0.5058 | 0.5180 | 0.5511 | 0.5372 | 0.4973 | 0.5231 | 0.5461 | 0.5536 | diff --git a/docs/regressions-dl20-doc-docTTTTTquery-per-doc.md b/docs/regressions-dl20-doc-docTTTTTquery-per-doc.md index 9251fffa90..121e5d9de9 100644 --- a/docs/regressions-dl20-doc-docTTTTTquery-per-doc.md +++ b/docs/regressions-dl20-doc-docTTTTTquery-per-doc.md @@ -20,11 +20,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-docTTTTTquery-per-doc \ - -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-docTTTTTquery-per-doc \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-docTTTTTquery-per-doc & ``` @@ -41,25 +42,29 @@ The original data can be found [here](https://trec.nist.gov/data/deep2020.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default.topics.dl20.txt \ - -bm25 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default+rm3.topics.dl20.txt \ - -bm25 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned.topics.dl20.txt \ - -bm25 -bm25.k1 4.68 -bm25.b 0.87 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 4.68 -bm25.b 0.87 -rm3 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default.topics.dl20.txt \ + -bm25 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default+rm3.topics.dl20.txt \ + -bm25 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned.topics.dl20.txt \ + -bm25 -bm25.k1 4.68 -bm25.b 0.87 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 4.68 -bm25.b 0.87 -rm3 -hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -83,12 +88,12 @@ MAP | BM25 (default)| +RM3 | BM25 (tune [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.4230 | 0.4228 | 0.4098 | 0.4104 | -NDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | +nDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.5885 | 0.5407 | 0.5852 | 0.5743 | -RR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | +MRR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.9369 | 0.8147 | 0.9439 | 0.8701 | diff --git a/docs/regressions-dl20-doc-docTTTTTquery-per-passage.md b/docs/regressions-dl20-doc-docTTTTTquery-per-passage.md index 9e4ea23a97..7b64bc21e5 100644 --- a/docs/regressions-dl20-doc-docTTTTTquery-per-passage.md +++ b/docs/regressions-dl20-doc-docTTTTTquery-per-passage.md @@ -21,11 +21,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-docTTTTTquery-per-passage \ - -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-docTTTTTquery-per-passage \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-docTTTTTquery-per-passage & ``` @@ -42,25 +43,29 @@ The original data can be found [here](https://trec.nist.gov/data/deep2020.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default.topics.dl20.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default+rm3.topics.dl20.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned.topics.dl20.txt \ - -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 2.56 -bm25.b 0.59 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default.topics.dl20.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default+rm3.topics.dl20.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned.topics.dl20.txt \ + -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 2.56 -bm25.b 0.59 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -84,12 +89,12 @@ MAP | BM25 (default)| +RM3 | BM25 (tune [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.4150 | 0.4269 | 0.4042 | 0.4023 | -NDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | +nDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.5957 | 0.5848 | 0.5931 | 0.5723 | -RR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | +MRR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.9361 | 0.8944 | 0.9469 | 0.9150 | diff --git a/docs/regressions-dl20-doc-per-passage.md b/docs/regressions-dl20-doc-per-passage.md index 9d9aee7946..b587a3bf18 100644 --- a/docs/regressions-dl20-doc-per-passage.md +++ b/docs/regressions-dl20-doc-per-passage.md @@ -21,11 +21,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-per-passage \ - -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-per-passage \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-per-passage & ``` @@ -42,45 +43,53 @@ The original data can be found [here](https://trec.nist.gov/data/deep2020.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default.topics.dl20.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+rm3.topics.dl20.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+ax.topics.dl20.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+prf.topics.dl20.txt \ - -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned.topics.dl20.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+ax.topics.dl20.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+prf.topics.dl20.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default.topics.dl20.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+rm3.topics.dl20.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+ax.topics.dl20.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+prf.topics.dl20.txt \ + -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned.topics.dl20.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+ax.topics.dl20.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+prf.topics.dl20.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -112,12 +121,12 @@ MAP | BM25 (default)| +RM3 | +Ax [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.3584 | 0.3769 | 0.3854 | 0.3672 | 0.3456 | 0.3471 | 0.3495 | 0.3629 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.5271 | 0.5159 | 0.5250 | 0.5217 | 0.5213 | 0.4983 | 0.4942 | 0.5260 | -RR | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | +MRR | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.8479 | 0.8136 | 0.8123 | 0.7911 | 0.8684 | 0.7807 | 0.8102 | 0.8478 | diff --git a/docs/regressions-dl20-doc.md b/docs/regressions-dl20-doc.md index f2dcf0be06..9d44ade798 100644 --- a/docs/regressions-dl20-doc.md +++ b/docs/regressions-dl20-doc.md @@ -20,11 +20,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/msmacro-doc \ - -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/msmacro-doc \ + -index indexes/lucene-index.msmarco-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmacro-doc & ``` @@ -41,35 +42,41 @@ The original data can be found [here](https://trec.nist.gov/data/deep2020.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmacro-doc.bm25-default.topics.dl20.txt \ - -bm25 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmacro-doc.bm25-default+rm3.topics.dl20.txt \ - -bm25 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmacro-doc.bm25-tuned.topics.dl20.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmacro-doc.bm25-tuned+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmacro-doc.bm25-tuned2.topics.dl20.txt \ - -bm25 -bm25.k1 4.46 -bm25.b 0.82 -hits 100 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmacro-doc.bm25-tuned2+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 4.46 -bm25.b 0.82 -rm3 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmacro-doc.bm25-default.topics.dl20.txt \ + -bm25 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmacro-doc.bm25-default+rm3.topics.dl20.txt \ + -bm25 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmacro-doc.bm25-tuned.topics.dl20.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmacro-doc.bm25-tuned+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmacro-doc.bm25-tuned2.topics.dl20.txt \ + -bm25 -bm25.k1 4.46 -bm25.b 0.82 -hits 100 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmacro-doc.bm25-tuned2+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 4.46 -bm25.b 0.82 -rm3 -hits 100 & ``` Evaluation can be performed using `trec_eval`: @@ -97,12 +104,12 @@ MAP | BM25 (default)| +RM3 | BM25 (tune [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.3791 | 0.4006 | 0.3630 | 0.3588 | 0.3583 | 0.3618 | -NDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | +nDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.5271 | 0.5248 | 0.5087 | 0.5117 | 0.5078 | 0.5202 | -RR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | +MRR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)| 0.8521 | 0.8541 | 0.8641 | 0.8188 | 0.8541 | 0.8458 | diff --git a/docs/regressions-dl20-passage-docTTTTTquery.md b/docs/regressions-dl20-passage-docTTTTTquery.md index ddfa36b37c..7bfe7b4232 100644 --- a/docs/regressions-dl20-passage-docTTTTTquery.md +++ b/docs/regressions-dl20-passage-docTTTTTquery.md @@ -14,11 +14,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-passage-docTTTTTquery \ - -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 9 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-passage-docTTTTTquery \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -generator DefaultLuceneDocumentGenerator \ + -threads 9 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-passage-docTTTTTquery & ``` @@ -36,35 +37,41 @@ The original data can be found [here](https://trec.nist.gov/data/deep2020.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-default.topics.dl20.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-default+rm3.topics.dl20.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned.topics.dl20.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2.topics.dl20.txt \ - -bm25 -bm25.k1 2.18 -bm25.b 0.86 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-default.topics.dl20.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-default+rm3.topics.dl20.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned.topics.dl20.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2.topics.dl20.txt \ + -bm25 -bm25.k1 2.18 -bm25.b 0.86 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 & ``` Evaluation can be performed using `trec_eval`: @@ -116,12 +123,12 @@ MAP | BM25 (default)| +RM3 | BM25 (tune [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)| 0.4074 | 0.4295 | 0.4082 | 0.4296 | 0.4171 | 0.4347 | -NDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | +nDCG@10 | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)| 0.6187 | 0.6172 | 0.6192 | 0.6177 | 0.6265 | 0.6232 | -RR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | +MRR | BM25 (default)| +RM3 | BM25 (tuned)| +RM3 | BM25 (tuned2)| +RM3 | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)| 0.7326 | 0.7424 | 0.7425 | 0.7422 | 0.7467 | 0.7327 | diff --git a/docs/regressions-dl20-passage.md b/docs/regressions-dl20-passage.md index 2baed9b4e5..6e99131df4 100644 --- a/docs/regressions-dl20-passage.md +++ b/docs/regressions-dl20-passage.md @@ -13,11 +13,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-passage \ - -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 9 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-passage \ + -index indexes/lucene-index.msmarco-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 9 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-passage & ``` @@ -35,45 +36,53 @@ The original data can be found [here](https://trec.nist.gov/data/deep2020.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-default.topics.dl20.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-default+rm3.topics.dl20.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-default+ax.topics.dl20.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-default+prf.topics.dl20.txt \ - -bm25 -bm25prf & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-tuned.topics.dl20.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-tuned+rm3.topics.dl20.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-tuned+ax.topics.dl20.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl20.txt \ - -output runs/run.msmarco-passage.bm25-tuned+prf.topics.dl20.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default.topics.dl20.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+rm3.topics.dl20.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+ax.topics.dl20.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+prf.topics.dl20.txt \ + -bm25 -bm25prf & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned.topics.dl20.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+rm3.topics.dl20.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+ax.topics.dl20.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl20.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+prf.topics.dl20.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -137,12 +146,12 @@ MAP | BM25 (default)| +RM3 | +Ax [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)| 0.2856 | 0.3019 | 0.3240 | 0.3117 | 0.2876 | 0.3056 | 0.3322 | 0.3136 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)| 0.4796 | 0.4821 | 0.4834 | 0.4721 | 0.4876 | 0.4808 | 0.5027 | 0.4788 | -RR | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | +MRR | BM25 (default)| +RM3 | +Ax | +PRF | BM25 (tuned)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)| 0.6585 | 0.6360 | 0.6096 | 0.6157 | 0.6594 | 0.6278 | 0.6328 | 0.6252 | diff --git a/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md b/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md index 592850d098..ff38bbb9f4 100644 --- a/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md +++ b/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md @@ -18,11 +18,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot \ - -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot \ + -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -impact -pretokenized \ >& logs/log.msmarco-v2-doc-segmented-unicoil-noexp-0shot & ``` @@ -39,10 +40,11 @@ The regression experiments here evaluate on the 57 topics for which NIST has pro After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.tsv.gz \ - -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ + -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.tsv.gz \ + -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: @@ -68,7 +70,7 @@ MRR@100 | uniCOIL (no expansion, zero-shot)| [DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.9122 | -NDCG@10 | uniCOIL (no expansion, zero-shot)| +nDCG@10 | uniCOIL (no expansion, zero-shot)| :---------------------------------------|-----------| [DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.6282 | diff --git a/docs/regressions-dl21-doc-segmented.md b/docs/regressions-dl21-doc-segmented.md index 4581a3e81f..c1d8021aeb 100644 --- a/docs/regressions-dl21-doc-segmented.md +++ b/docs/regressions-dl21-doc-segmented.md @@ -22,11 +22,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \ - -input /path/to/msmarco-v2-doc-segmented \ - -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2DocCollection \ + -input /path/to/msmarco-v2-doc-segmented \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-doc-segmented & ``` @@ -44,25 +45,29 @@ The regression experiments here evaluate on the 57 topics for which NIST has pro After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default.topics.dl21.txt \ - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+rm3.topics.dl21.txt \ - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+ax.topics.dl21.txt \ - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+prf.topics.dl21.txt \ - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default.topics.dl21.txt \ + -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+rm3.topics.dl21.txt \ + -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+ax.topics.dl21.txt \ + -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+prf.topics.dl21.txt \ + -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -103,7 +108,7 @@ MRR@100 | BM25 (default)| +RM3 | +Ax [DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.8937 | 0.9018 | 0.9221 | 0.9146 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.5776 | 0.6185 | 0.5840 | 0.5936 | diff --git a/docs/regressions-dl21-doc.md b/docs/regressions-dl21-doc.md index dbebb16644..1fddb1bd49 100644 --- a/docs/regressions-dl21-doc.md +++ b/docs/regressions-dl21-doc.md @@ -22,11 +22,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \ - -input /path/to/msmarco-v2-doc \ - -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2DocCollection \ + -input /path/to/msmarco-v2-doc \ + -index indexes/lucene-index.msmarco-v2-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-doc & ``` @@ -44,25 +45,29 @@ The regression experiments here evaluate on the 57 topics for which NIST has pro After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc.bm25-default.topics.dl21.txt \ - -hits 1000 -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+rm3.topics.dl21.txt \ - -hits 1000 -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+ax.topics.dl21.txt \ - -hits 1000 -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+prf.topics.dl21.txt \ - -hits 1000 -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default.topics.dl21.txt \ + -hits 1000 -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+rm3.topics.dl21.txt \ + -hits 1000 -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+ax.topics.dl21.txt \ + -hits 1000 -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+prf.topics.dl21.txt \ + -hits 1000 -bm25 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -103,7 +108,7 @@ MRR@100 | BM25 (default)| +RM3 | +Ax [DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.8367 | 0.7994 | 0.7434 | 0.7869 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.5116 | 0.5339 | 0.4804 | 0.4850 | diff --git a/docs/regressions-dl21-passage-augmented.md b/docs/regressions-dl21-passage-augmented.md index ee0bf151d0..baf66c709d 100644 --- a/docs/regressions-dl21-passage-augmented.md +++ b/docs/regressions-dl21-passage-augmented.md @@ -17,11 +17,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \ - -input /path/to/msmarco-v2-passage-augmented \ - -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2PassageCollection \ + -input /path/to/msmarco-v2-passage-augmented \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-passage-augmented & ``` @@ -39,25 +40,29 @@ The regression experiments here evaluate on the 53 topics for which NIST has pro After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default.topics.dl21.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+rm3.topics.dl21.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+ax.topics.dl21.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+prf.topics.dl21.txt \ - -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default.topics.dl21.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+rm3.topics.dl21.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+ax.topics.dl21.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+prf.topics.dl21.txt \ + -bm25 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -102,7 +107,7 @@ MRR@100 | BM25 (default)| +RM3 | +Ax [DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.5303 | 0.4906 | 0.4269 | 0.4737 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.3977 | 0.3906 | 0.2927 | 0.3488 | diff --git a/docs/regressions-dl21-passage-unicoil-noexp-0shot.md b/docs/regressions-dl21-passage-unicoil-noexp-0shot.md index 9d620ebddb..ece55f7d87 100644 --- a/docs/regressions-dl21-passage-unicoil-noexp-0shot.md +++ b/docs/regressions-dl21-passage-unicoil-noexp-0shot.md @@ -18,11 +18,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-v2-passage-unicoil-noexp-0shot \ - -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2-passage-unicoil-noexp-0shot \ + -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -impact -pretokenized \ >& logs/log.msmarco-v2-passage-unicoil-noexp-0shot & ``` @@ -39,10 +40,11 @@ The regression experiments here evaluate on the 53 topics for which NIST has pro After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.tsv.gz \ - -impact -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ + -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.tsv.gz \ + -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: @@ -69,7 +71,7 @@ MRR@100 | uniCOIL (no expansion, zero-shot)| [DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.6991 | -NDCG@10 | uniCOIL (no expansion, zero-shot)| +nDCG@10 | uniCOIL (no expansion, zero-shot)| :---------------------------------------|-----------| [DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.5756 | diff --git a/docs/regressions-dl21-passage.md b/docs/regressions-dl21-passage.md index c067b8bcf0..fdcd1ca289 100644 --- a/docs/regressions-dl21-passage.md +++ b/docs/regressions-dl21-passage.md @@ -17,11 +17,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \ - -input /path/to/msmarco-v2-passage \ - -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2PassageCollection \ + -input /path/to/msmarco-v2-passage \ + -index indexes/lucene-index.msmarco-v2-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-passage & ``` @@ -39,25 +40,29 @@ The regression experiments here evaluate on the 53 topics for which NIST has pro After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage.bm25-default.topics.dl21.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+rm3.topics.dl21.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+ax.topics.dl21.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.dl21.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+prf.topics.dl21.txt \ - -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default.topics.dl21.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+rm3.topics.dl21.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+ax.topics.dl21.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.dl21.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+prf.topics.dl21.txt \ + -bm25 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -102,7 +107,7 @@ MRR@100 | BM25 (default)| +RM3 | +Ax [DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.5060 | 0.4925 | 0.5733 | 0.5532 | -NDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | +nDCG@10 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)| 0.4458 | 0.4480 | 0.4851 | 0.4740 | diff --git a/docs/regressions-fever.md b/docs/regressions-fever.md index fa2246aa85..0f1d341a1c 100644 --- a/docs/regressions-fever.md +++ b/docs/regressions-fever.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection FeverParagraphCollection \ - -input /path/to/fever \ - -index indexes/lucene-index.fever-paragraph \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection FeverParagraphCollection \ + -input /path/to/fever \ + -index indexes/lucene-index.fever-paragraph \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.fever & ``` @@ -31,15 +32,17 @@ The original data can be found [here](https://fever.ai/resources.html). After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.fever-paragraph \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.fever.dev.txt \ - -output runs/run.fever.bm25-default.topics.fever.dev.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.fever-paragraph \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.fever.dev.txt \ - -output runs/run.fever.bm25-tuned.topics.fever.dev.txt \ - -bm25 -bm25.k1 0.9 -bm25.b 0.1 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.fever-paragraph \ + -topics src/main/resources/topics-and-qrels/topics.fever.dev.txt -topicreader TsvInt \ + -output runs/run.fever.bm25-default.topics.fever.dev.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.fever-paragraph \ + -topics src/main/resources/topics-and-qrels/topics.fever.dev.txt -topicreader TsvInt \ + -output runs/run.fever.bm25-tuned.topics.fever.dev.txt \ + -bm25 -bm25.k1 0.9 -bm25.b 0.1 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-fire12-bn.md b/docs/regressions-fire12-bn.md index f5a5498ffc..635d26dad4 100644 --- a/docs/regressions-fire12-bn.md +++ b/docs/regressions-fire12-bn.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/fire12-bn \ - -index indexes/lucene-index.fire12-bn.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw -language bn \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/fire12-bn \ + -index indexes/lucene-index.fire12-bn \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw -language bn \ >& logs/log.fire12-bn & ``` @@ -34,10 +35,11 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.fire12-bn.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.fire12bn.176-225.txt \ - -output runs/run.fire12-bn.bm25.topics.fire12bn.176-225.txt \ - -language bn -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.fire12-bn \ + -topics src/main/resources/topics-and-qrels/topics.fire12bn.176-225.txt -topicreader Trec \ + -output runs/run.fire12-bn.bm25.topics.fire12bn.176-225.txt \ + -bm25 -language bn & ``` Evaluation can be performed using `trec_eval`: @@ -60,6 +62,6 @@ P20 | BM25 | [FIRE 2012 (Monolingual Bengali)](../src/main/resources/topics-and-qrels/topics.fire12bn.176-225.txt)| 0.3740 | -NDCG20 | BM25 | +nDCG@20 | BM25 | :---------------------------------------|-----------| [FIRE 2012 (Monolingual Bengali)](../src/main/resources/topics-and-qrels/topics.fire12bn.176-225.txt)| 0.4261 | diff --git a/docs/regressions-fire12-en.md b/docs/regressions-fire12-en.md index 7edff92ca7..4224afa714 100644 --- a/docs/regressions-fire12-en.md +++ b/docs/regressions-fire12-en.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/fire12-en \ - -index indexes/lucene-index.fire12-en.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw -language en \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/fire12-en \ + -index indexes/lucene-index.fire12-en \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw -language en \ >& logs/log.fire12-en & ``` @@ -34,10 +35,11 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.fire12-en.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt \ - -output runs/run.fire12-en.bm25.topics.fire12en.176-225.txt \ - -language en -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.fire12-en \ + -topics src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt -topicreader Trec \ + -output runs/run.fire12-en.bm25.topics.fire12en.176-225.txt \ + -bm25 -language en & ``` Evaluation can be performed using `trec_eval`: @@ -60,6 +62,6 @@ P20 | BM25 | [FIRE 2012 (Monolingual English)](../src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt)| 0.4970 | -NDCG20 | BM25 | +nDCG@20 | BM25 | :---------------------------------------|-----------| [FIRE 2012 (Monolingual English)](../src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt)| 0.5420 | diff --git a/docs/regressions-fire12-hi.md b/docs/regressions-fire12-hi.md index f706b0c947..a36557e2d7 100644 --- a/docs/regressions-fire12-hi.md +++ b/docs/regressions-fire12-hi.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/fire12-hi \ - -index indexes/lucene-index.fire12-hi.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw -language hi \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/fire12-hi \ + -index indexes/lucene-index.fire12-hi \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw -language hi \ >& logs/log.fire12-hi & ``` @@ -34,10 +35,11 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.fire12-hi.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.fire12hi.176-225.txt \ - -output runs/run.fire12-hi.bm25.topics.fire12hi.176-225.txt \ - -language hi -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.fire12-hi \ + -topics src/main/resources/topics-and-qrels/topics.fire12hi.176-225.txt -topicreader Trec \ + -output runs/run.fire12-hi.bm25.topics.fire12hi.176-225.txt \ + -bm25 -language hi & ``` Evaluation can be performed using `trec_eval`: @@ -60,6 +62,6 @@ P20 | BM25 | [FIRE 2012 (Monolingual Hindi)](../src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt)| 0.4470 | -NDCG20 | BM25 | +nDCG@20 | BM25 | :---------------------------------------|-----------| [FIRE 2012 (Monolingual Hindi)](../src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt)| 0.5310 | diff --git a/docs/regressions-gov2.md b/docs/regressions-gov2.md index b176f697ee..980aa0a037 100644 --- a/docs/regressions-gov2.md +++ b/docs/regressions-gov2.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection TrecwebCollection \ - -input /path/to/gov2 \ - -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 44 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection TrecwebCollection \ + -input /path/to/gov2 \ + -index indexes/lucene-index.gov2 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 44 -storePositions -storeDocvectors -storeRaw \ >& logs/log.gov2 & ``` @@ -35,83 +36,101 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt \ - -output runs/run.gov2.bm25.topics.terabyte04.701-750.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt \ - -output runs/run.gov2.bm25.topics.terabyte05.751-800.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt \ - -output runs/run.gov2.bm25.topics.terabyte06.801-850.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt \ - -output runs/run.gov2.bm25+rm3.topics.terabyte04.701-750.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt \ - -output runs/run.gov2.bm25+rm3.topics.terabyte05.751-800.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt \ - -output runs/run.gov2.bm25+rm3.topics.terabyte06.801-850.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt \ - -output runs/run.gov2.bm25+ax.topics.terabyte04.701-750.txt \ - -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt \ - -output runs/run.gov2.bm25+ax.topics.terabyte05.751-800.txt \ - -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt \ - -output runs/run.gov2.bm25+ax.topics.terabyte06.801-850.txt \ - -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt \ - -output runs/run.gov2.ql.topics.terabyte04.701-750.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt \ - -output runs/run.gov2.ql.topics.terabyte05.751-800.txt \ - -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt \ - -output runs/run.gov2.ql.topics.terabyte06.801-850.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt \ - -output runs/run.gov2.ql+rm3.topics.terabyte04.701-750.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt \ - -output runs/run.gov2.ql+rm3.topics.terabyte05.751-800.txt \ - -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt \ - -output runs/run.gov2.ql+rm3.topics.terabyte06.801-850.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt \ - -output runs/run.gov2.ql+ax.topics.terabyte04.701-750.txt \ - -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt \ - -output runs/run.gov2.ql+ax.topics.terabyte05.751-800.txt \ - -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.gov2.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt \ - -output runs/run.gov2.ql+ax.topics.terabyte06.801-850.txt \ - -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt -topicreader Trec \ + -output runs/run.gov2.bm25.topics.terabyte04.701-750.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt -topicreader Trec \ + -output runs/run.gov2.bm25.topics.terabyte05.751-800.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt -topicreader Trec \ + -output runs/run.gov2.bm25.topics.terabyte06.801-850.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt -topicreader Trec \ + -output runs/run.gov2.bm25+rm3.topics.terabyte04.701-750.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt -topicreader Trec \ + -output runs/run.gov2.bm25+rm3.topics.terabyte05.751-800.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt -topicreader Trec \ + -output runs/run.gov2.bm25+rm3.topics.terabyte06.801-850.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt -topicreader Trec \ + -output runs/run.gov2.bm25+ax.topics.terabyte04.701-750.txt \ + -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt -topicreader Trec \ + -output runs/run.gov2.bm25+ax.topics.terabyte05.751-800.txt \ + -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt -topicreader Trec \ + -output runs/run.gov2.bm25+ax.topics.terabyte06.801-850.txt \ + -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt -topicreader Trec \ + -output runs/run.gov2.ql.topics.terabyte04.701-750.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt -topicreader Trec \ + -output runs/run.gov2.ql.topics.terabyte05.751-800.txt \ + -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt -topicreader Trec \ + -output runs/run.gov2.ql.topics.terabyte06.801-850.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt -topicreader Trec \ + -output runs/run.gov2.ql+rm3.topics.terabyte04.701-750.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt -topicreader Trec \ + -output runs/run.gov2.ql+rm3.topics.terabyte05.751-800.txt \ + -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt -topicreader Trec \ + -output runs/run.gov2.ql+rm3.topics.terabyte06.801-850.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt -topicreader Trec \ + -output runs/run.gov2.ql+ax.topics.terabyte04.701-750.txt \ + -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt -topicreader Trec \ + -output runs/run.gov2.ql+ax.topics.terabyte05.751-800.txt \ + -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.gov2 \ + -topics src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt -topicreader Trec \ + -output runs/run.gov2.ql+ax.topics.terabyte06.801-850.txt \ + -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mb11.md b/docs/regressions-mb11.md index bd220eace8..812c8b23d6 100644 --- a/docs/regressions-mb11.md +++ b/docs/regressions-mb11.md @@ -12,11 +12,12 @@ effectiveness results you'll get should be similar, but will likely not be ident Indexing the Tweets2011 collection: ``` -nohup sh target/appassembler/bin/IndexCollection -collection TweetCollection \ - -input /path/to/mb11 \ - -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -generator TweetGenerator \ - -threads 44 -storePositions -storeDocvectors -storeRaw -uniqueDocid -tweet.keepUrls -tweet.stemming \ +target/appassembler/bin/IndexCollection \ + -collection TweetCollection \ + -input /path/to/mb11 \ + -index indexes/lucene-index.mb11 \ + -generator TweetGenerator \ + -threads 44 -storePositions -storeDocvectors -storeRaw -uniqueDocid -tweet.keepUrls -tweet.stemming \ >& logs/log.mb11 & ``` @@ -41,59 +42,71 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt \ - -output runs/run.mb11.bm25.topics.microblog2011.txt \ - -searchtweets -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt \ - -output runs/run.mb11.bm25.topics.microblog2012.txt \ - -searchtweets -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt \ - -output runs/run.mb11.bm25+rm3.topics.microblog2011.txt \ - -searchtweets -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt \ - -output runs/run.mb11.bm25+rm3.topics.microblog2012.txt \ - -searchtweets -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt \ - -output runs/run.mb11.bm25+ax.topics.microblog2011.txt \ - -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt \ - -output runs/run.mb11.bm25+ax.topics.microblog2012.txt \ - -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt \ - -output runs/run.mb11.ql.topics.microblog2011.txt \ - -searchtweets -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt \ - -output runs/run.mb11.ql.topics.microblog2012.txt \ - -searchtweets -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt \ - -output runs/run.mb11.ql+rm3.topics.microblog2011.txt \ - -searchtweets -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt \ - -output runs/run.mb11.ql+rm3.topics.microblog2012.txt \ - -searchtweets -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt \ - -output runs/run.mb11.ql+ax.topics.microblog2011.txt \ - -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb11.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt \ - -output runs/run.mb11.ql+ax.topics.microblog2012.txt \ - -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt -topicreader Microblog \ + -output runs/run.mb11.bm25.topics.microblog2011.txt \ + -searchtweets -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt -topicreader Microblog \ + -output runs/run.mb11.bm25.topics.microblog2012.txt \ + -searchtweets -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt -topicreader Microblog \ + -output runs/run.mb11.bm25+rm3.topics.microblog2011.txt \ + -searchtweets -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt -topicreader Microblog \ + -output runs/run.mb11.bm25+rm3.topics.microblog2012.txt \ + -searchtweets -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt -topicreader Microblog \ + -output runs/run.mb11.bm25+ax.topics.microblog2011.txt \ + -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt -topicreader Microblog \ + -output runs/run.mb11.bm25+ax.topics.microblog2012.txt \ + -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt -topicreader Microblog \ + -output runs/run.mb11.ql.topics.microblog2011.txt \ + -searchtweets -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt -topicreader Microblog \ + -output runs/run.mb11.ql.topics.microblog2012.txt \ + -searchtweets -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt -topicreader Microblog \ + -output runs/run.mb11.ql+rm3.topics.microblog2011.txt \ + -searchtweets -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt -topicreader Microblog \ + -output runs/run.mb11.ql+rm3.topics.microblog2012.txt \ + -searchtweets -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2011.txt -topicreader Microblog \ + -output runs/run.mb11.ql+ax.topics.microblog2011.txt \ + -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb11 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2012.txt -topicreader Microblog \ + -output runs/run.mb11.ql+ax.topics.microblog2012.txt \ + -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mb13.md b/docs/regressions-mb13.md index 3ccb5c81a4..b3b5692d2a 100644 --- a/docs/regressions-mb13.md +++ b/docs/regressions-mb13.md @@ -12,11 +12,12 @@ effectiveness results you'll get should be similar, but will likely not be ident Indexing the Tweets2013 collection: ``` -nohup sh target/appassembler/bin/IndexCollection -collection TweetCollection \ - -input /path/to/mb13 \ - -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -generator TweetGenerator \ - -threads 44 -storePositions -storeDocvectors -storeRaw -uniqueDocid -optimize -tweet.keepUrls -tweet.stemming \ +target/appassembler/bin/IndexCollection \ + -collection TweetCollection \ + -input /path/to/mb13 \ + -index indexes/lucene-index.mb13 \ + -generator TweetGenerator \ + -threads 44 -storePositions -storeDocvectors -storeRaw -uniqueDocid -optimize -tweet.keepUrls -tweet.stemming \ >& logs/log.mb13 & ``` @@ -41,59 +42,71 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt \ - -output runs/run.mb13.bm25.topics.microblog2013.txt \ - -searchtweets -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt \ - -output runs/run.mb13.bm25.topics.microblog2014.txt \ - -searchtweets -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt \ - -output runs/run.mb13.bm25+rm3.topics.microblog2013.txt \ - -searchtweets -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt \ - -output runs/run.mb13.bm25+rm3.topics.microblog2014.txt \ - -searchtweets -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt \ - -output runs/run.mb13.bm25+ax.topics.microblog2013.txt \ - -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt \ - -output runs/run.mb13.bm25+ax.topics.microblog2014.txt \ - -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt \ - -output runs/run.mb13.ql.topics.microblog2013.txt \ - -searchtweets -qld & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt \ - -output runs/run.mb13.ql.topics.microblog2014.txt \ - -searchtweets -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt \ - -output runs/run.mb13.ql+rm3.topics.microblog2013.txt \ - -searchtweets -qld -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt \ - -output runs/run.mb13.ql+rm3.topics.microblog2014.txt \ - -searchtweets -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt \ - -output runs/run.mb13.ql+ax.topics.microblog2013.txt \ - -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mb13.pos+docvectors+raw \ - -topicreader Microblog -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt \ - -output runs/run.mb13.ql+ax.topics.microblog2014.txt \ - -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt -topicreader Microblog \ + -output runs/run.mb13.bm25.topics.microblog2013.txt \ + -searchtweets -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt -topicreader Microblog \ + -output runs/run.mb13.bm25.topics.microblog2014.txt \ + -searchtweets -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt -topicreader Microblog \ + -output runs/run.mb13.bm25+rm3.topics.microblog2013.txt \ + -searchtweets -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt -topicreader Microblog \ + -output runs/run.mb13.bm25+rm3.topics.microblog2014.txt \ + -searchtweets -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt -topicreader Microblog \ + -output runs/run.mb13.bm25+ax.topics.microblog2013.txt \ + -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt -topicreader Microblog \ + -output runs/run.mb13.bm25+ax.topics.microblog2014.txt \ + -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt -topicreader Microblog \ + -output runs/run.mb13.ql.topics.microblog2013.txt \ + -searchtweets -qld & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt -topicreader Microblog \ + -output runs/run.mb13.ql.topics.microblog2014.txt \ + -searchtweets -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt -topicreader Microblog \ + -output runs/run.mb13.ql+rm3.topics.microblog2013.txt \ + -searchtweets -qld -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt -topicreader Microblog \ + -output runs/run.mb13.ql+rm3.topics.microblog2014.txt \ + -searchtweets -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2013.txt -topicreader Microblog \ + -output runs/run.mb13.ql+ax.topics.microblog2013.txt \ + -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mb13 \ + -topics src/main/resources/topics-and-qrels/topics.microblog2014.txt -topicreader Microblog \ + -output runs/run.mb13.ql+ax.topics.microblog2014.txt \ + -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-ar.md b/docs/regressions-mrtydi-v1.1-ar.md index 178d92638a..e1baa1fa03 100644 --- a/docs/regressions-mrtydi-v1.1-ar.md +++ b/docs/regressions-mrtydi-v1.1-ar.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-ar \ - -index indexes/lucene-index.mrtydi-v1.1-arabic.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language ar \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-ar \ + -index indexes/lucene-index.mrtydi-v1.1-arabic \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language ar \ >& logs/log.mrtydi-v1.1-ar & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-arabic.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ar.train.txt.gz \ - -output runs/run.mrtydi-v1.1-ar.bm25.topics.mrtydi-v1.1-ar.train.txt.gz \ - -language ar -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-arabic.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ar.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-ar.bm25.topics.mrtydi-v1.1-ar.dev.txt.gz \ - -language ar -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-arabic.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ar.test.txt.gz \ - -output runs/run.mrtydi-v1.1-ar.bm25.topics.mrtydi-v1.1-ar.test.txt.gz \ - -language ar -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-arabic \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ar.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ar.bm25.topics.mrtydi-v1.1-ar.train.txt.gz \ + -bm25 -hits 100 -language ar & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-arabic \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ar.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ar.bm25.topics.mrtydi-v1.1-ar.dev.txt.gz \ + -bm25 -hits 100 -language ar & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-arabic \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ar.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ar.bm25.topics.mrtydi-v1.1-ar.test.txt.gz \ + -bm25 -hits 100 -language ar & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-bn.md b/docs/regressions-mrtydi-v1.1-bn.md index db499b60e5..17c70a7563 100644 --- a/docs/regressions-mrtydi-v1.1-bn.md +++ b/docs/regressions-mrtydi-v1.1-bn.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-bn \ - -index indexes/lucene-index.mrtydi-v1.1-bengali.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language bn \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-bn \ + -index indexes/lucene-index.mrtydi-v1.1-bengali \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language bn \ >& logs/log.mrtydi-v1.1-bn & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-bengali.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-bn.train.txt.gz \ - -output runs/run.mrtydi-v1.1-bn.bm25.topics.mrtydi-v1.1-bn.train.txt.gz \ - -language bn -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-bengali.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-bn.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-bn.bm25.topics.mrtydi-v1.1-bn.dev.txt.gz \ - -language bn -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-bengali.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-bn.test.txt.gz \ - -output runs/run.mrtydi-v1.1-bn.bm25.topics.mrtydi-v1.1-bn.test.txt.gz \ - -language bn -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-bengali \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-bn.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-bn.bm25.topics.mrtydi-v1.1-bn.train.txt.gz \ + -bm25 -hits 100 -language bn & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-bengali \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-bn.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-bn.bm25.topics.mrtydi-v1.1-bn.dev.txt.gz \ + -bm25 -hits 100 -language bn & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-bengali \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-bn.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-bn.bm25.topics.mrtydi-v1.1-bn.test.txt.gz \ + -bm25 -hits 100 -language bn & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-en.md b/docs/regressions-mrtydi-v1.1-en.md index 19c5355603..223d854f07 100644 --- a/docs/regressions-mrtydi-v1.1-en.md +++ b/docs/regressions-mrtydi-v1.1-en.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-en \ - -index indexes/lucene-index.mrtydi-v1.1-english.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language en \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-en \ + -index indexes/lucene-index.mrtydi-v1.1-english \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language en \ >& logs/log.mrtydi-v1.1-en & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-english.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-en.train.txt.gz \ - -output runs/run.mrtydi-v1.1-en.bm25.topics.mrtydi-v1.1-en.train.txt.gz \ - -language en -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-english.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-en.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-en.bm25.topics.mrtydi-v1.1-en.dev.txt.gz \ - -language en -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-english.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-en.test.txt.gz \ - -output runs/run.mrtydi-v1.1-en.bm25.topics.mrtydi-v1.1-en.test.txt.gz \ - -language en -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-english \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-en.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-en.bm25.topics.mrtydi-v1.1-en.train.txt.gz \ + -bm25 -hits 100 -language en & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-english \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-en.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-en.bm25.topics.mrtydi-v1.1-en.dev.txt.gz \ + -bm25 -hits 100 -language en & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-english \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-en.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-en.bm25.topics.mrtydi-v1.1-en.test.txt.gz \ + -bm25 -hits 100 -language en & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-fi.md b/docs/regressions-mrtydi-v1.1-fi.md index 1427115e98..43f03f3739 100644 --- a/docs/regressions-mrtydi-v1.1-fi.md +++ b/docs/regressions-mrtydi-v1.1-fi.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-fi \ - -index indexes/lucene-index.mrtydi-v1.1-finnish.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language fi \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-fi \ + -index indexes/lucene-index.mrtydi-v1.1-finnish \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language fi \ >& logs/log.mrtydi-v1.1-fi & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-finnish.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-fi.train.txt.gz \ - -output runs/run.mrtydi-v1.1-fi.bm25.topics.mrtydi-v1.1-fi.train.txt.gz \ - -language fi -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-finnish.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-fi.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-fi.bm25.topics.mrtydi-v1.1-fi.dev.txt.gz \ - -language fi -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-finnish.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-fi.test.txt.gz \ - -output runs/run.mrtydi-v1.1-fi.bm25.topics.mrtydi-v1.1-fi.test.txt.gz \ - -language fi -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-finnish \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-fi.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-fi.bm25.topics.mrtydi-v1.1-fi.train.txt.gz \ + -bm25 -hits 100 -language fi & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-finnish \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-fi.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-fi.bm25.topics.mrtydi-v1.1-fi.dev.txt.gz \ + -bm25 -hits 100 -language fi & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-finnish \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-fi.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-fi.bm25.topics.mrtydi-v1.1-fi.test.txt.gz \ + -bm25 -hits 100 -language fi & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-id.md b/docs/regressions-mrtydi-v1.1-id.md index 25fb1e2113..81af9089cb 100644 --- a/docs/regressions-mrtydi-v1.1-id.md +++ b/docs/regressions-mrtydi-v1.1-id.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-id \ - -index indexes/lucene-index.mrtydi-v1.1-indonesian.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language id \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-id \ + -index indexes/lucene-index.mrtydi-v1.1-indonesian \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language id \ >& logs/log.mrtydi-v1.1-id & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-indonesian.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-id.train.txt.gz \ - -output runs/run.mrtydi-v1.1-id.bm25.topics.mrtydi-v1.1-id.train.txt.gz \ - -language id -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-indonesian.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-id.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-id.bm25.topics.mrtydi-v1.1-id.dev.txt.gz \ - -language id -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-indonesian.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-id.test.txt.gz \ - -output runs/run.mrtydi-v1.1-id.bm25.topics.mrtydi-v1.1-id.test.txt.gz \ - -language id -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-indonesian \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-id.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-id.bm25.topics.mrtydi-v1.1-id.train.txt.gz \ + -bm25 -hits 100 -language id & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-indonesian \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-id.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-id.bm25.topics.mrtydi-v1.1-id.dev.txt.gz \ + -bm25 -hits 100 -language id & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-indonesian \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-id.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-id.bm25.topics.mrtydi-v1.1-id.test.txt.gz \ + -bm25 -hits 100 -language id & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-ja.md b/docs/regressions-mrtydi-v1.1-ja.md index fd0aa6dcfd..86c230d8e0 100644 --- a/docs/regressions-mrtydi-v1.1-ja.md +++ b/docs/regressions-mrtydi-v1.1-ja.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-ja \ - -index indexes/lucene-index.mrtydi-v1.1-japanese.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language ja \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-ja \ + -index indexes/lucene-index.mrtydi-v1.1-japanese \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language ja \ >& logs/log.mrtydi-v1.1-ja & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-japanese.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ja.train.txt.gz \ - -output runs/run.mrtydi-v1.1-ja.bm25.topics.mrtydi-v1.1-ja.train.txt.gz \ - -language ja -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-japanese.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ja.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-ja.bm25.topics.mrtydi-v1.1-ja.dev.txt.gz \ - -language ja -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-japanese.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ja.test.txt.gz \ - -output runs/run.mrtydi-v1.1-ja.bm25.topics.mrtydi-v1.1-ja.test.txt.gz \ - -language ja -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-japanese \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ja.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ja.bm25.topics.mrtydi-v1.1-ja.train.txt.gz \ + -bm25 -hits 100 -language ja & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-japanese \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ja.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ja.bm25.topics.mrtydi-v1.1-ja.dev.txt.gz \ + -bm25 -hits 100 -language ja & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-japanese \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ja.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ja.bm25.topics.mrtydi-v1.1-ja.test.txt.gz \ + -bm25 -hits 100 -language ja & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-ko.md b/docs/regressions-mrtydi-v1.1-ko.md index 95b72c6cd4..0c844659cb 100644 --- a/docs/regressions-mrtydi-v1.1-ko.md +++ b/docs/regressions-mrtydi-v1.1-ko.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-ko \ - -index indexes/lucene-index.mrtydi-v1.1-korean.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language ko \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-ko \ + -index indexes/lucene-index.mrtydi-v1.1-korean \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language ko \ >& logs/log.mrtydi-v1.1-ko & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-korean.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ko.train.txt.gz \ - -output runs/run.mrtydi-v1.1-ko.bm25.topics.mrtydi-v1.1-ko.train.txt.gz \ - -language ko -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-korean.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ko.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-ko.bm25.topics.mrtydi-v1.1-ko.dev.txt.gz \ - -language ko -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-korean.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ko.test.txt.gz \ - -output runs/run.mrtydi-v1.1-ko.bm25.topics.mrtydi-v1.1-ko.test.txt.gz \ - -language ko -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-korean \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ko.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ko.bm25.topics.mrtydi-v1.1-ko.train.txt.gz \ + -bm25 -hits 100 -language ko & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-korean \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ko.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ko.bm25.topics.mrtydi-v1.1-ko.dev.txt.gz \ + -bm25 -hits 100 -language ko & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-korean \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ko.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ko.bm25.topics.mrtydi-v1.1-ko.test.txt.gz \ + -bm25 -hits 100 -language ko & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-ru.md b/docs/regressions-mrtydi-v1.1-ru.md index af4016ac2d..bfe696faea 100644 --- a/docs/regressions-mrtydi-v1.1-ru.md +++ b/docs/regressions-mrtydi-v1.1-ru.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-ru \ - -index indexes/lucene-index.mrtydi-v1.1-russian.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language ru \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-ru \ + -index indexes/lucene-index.mrtydi-v1.1-russian \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language ru \ >& logs/log.mrtydi-v1.1-ru & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-russian.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ru.train.txt.gz \ - -output runs/run.mrtydi-v1.1-ru.bm25.topics.mrtydi-v1.1-ru.train.txt.gz \ - -language ru -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-russian.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ru.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-ru.bm25.topics.mrtydi-v1.1-ru.dev.txt.gz \ - -language ru -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-russian.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ru.test.txt.gz \ - -output runs/run.mrtydi-v1.1-ru.bm25.topics.mrtydi-v1.1-ru.test.txt.gz \ - -language ru -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-russian \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ru.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ru.bm25.topics.mrtydi-v1.1-ru.train.txt.gz \ + -bm25 -hits 100 -language ru & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-russian \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ru.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ru.bm25.topics.mrtydi-v1.1-ru.dev.txt.gz \ + -bm25 -hits 100 -language ru & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-russian \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-ru.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-ru.bm25.topics.mrtydi-v1.1-ru.test.txt.gz \ + -bm25 -hits 100 -language ru & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-sw.md b/docs/regressions-mrtydi-v1.1-sw.md index cc0807823d..e1eb240e4a 100644 --- a/docs/regressions-mrtydi-v1.1-sw.md +++ b/docs/regressions-mrtydi-v1.1-sw.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-sw \ - -index indexes/lucene-index.mrtydi-v1.1-swahili.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-sw \ + -index indexes/lucene-index.mrtydi-v1.1-swahili \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -pretokenized \ >& logs/log.mrtydi-v1.1-sw & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-swahili.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-sw.train.txt.gz \ - -output runs/run.mrtydi-v1.1-sw.bm25.topics.mrtydi-v1.1-sw.train.txt.gz \ - -pretokenized -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-swahili.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-sw.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-sw.bm25.topics.mrtydi-v1.1-sw.dev.txt.gz \ - -pretokenized -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-swahili.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-sw.test.txt.gz \ - -output runs/run.mrtydi-v1.1-sw.bm25.topics.mrtydi-v1.1-sw.test.txt.gz \ - -pretokenized -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-swahili \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-sw.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-sw.bm25.topics.mrtydi-v1.1-sw.train.txt.gz \ + -bm25 -hits 100 -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-swahili \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-sw.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-sw.bm25.topics.mrtydi-v1.1-sw.dev.txt.gz \ + -bm25 -hits 100 -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-swahili \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-sw.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-sw.bm25.topics.mrtydi-v1.1-sw.test.txt.gz \ + -bm25 -hits 100 -pretokenized & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-te.md b/docs/regressions-mrtydi-v1.1-te.md index bd56bfa7d9..1a797d960a 100644 --- a/docs/regressions-mrtydi-v1.1-te.md +++ b/docs/regressions-mrtydi-v1.1-te.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-te \ - -index indexes/lucene-index.mrtydi-v1.1-telugu.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-te \ + -index indexes/lucene-index.mrtydi-v1.1-telugu \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -pretokenized \ >& logs/log.mrtydi-v1.1-te & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-telugu.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-te.train.txt.gz \ - -output runs/run.mrtydi-v1.1-te.bm25.topics.mrtydi-v1.1-te.train.txt.gz \ - -pretokenized -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-telugu.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-te.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-te.bm25.topics.mrtydi-v1.1-te.dev.txt.gz \ - -pretokenized -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-telugu.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-te.test.txt.gz \ - -output runs/run.mrtydi-v1.1-te.bm25.topics.mrtydi-v1.1-te.test.txt.gz \ - -pretokenized -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-telugu \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-te.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-te.bm25.topics.mrtydi-v1.1-te.train.txt.gz \ + -bm25 -hits 100 -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-telugu \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-te.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-te.bm25.topics.mrtydi-v1.1-te.dev.txt.gz \ + -bm25 -hits 100 -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-telugu \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-te.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-te.bm25.topics.mrtydi-v1.1-te.test.txt.gz \ + -bm25 -hits 100 -pretokenized & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-mrtydi-v1.1-th.md b/docs/regressions-mrtydi-v1.1-th.md index 617a716c86..d488965c27 100644 --- a/docs/regressions-mrtydi-v1.1-th.md +++ b/docs/regressions-mrtydi-v1.1-th.md @@ -10,11 +10,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MrTyDiCollection \ - -input /path/to/mrtydi-v1.1-th \ - -index indexes/lucene-index.mrtydi-v1.1-thai.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw -language th \ +target/appassembler/bin/IndexCollection \ + -collection MrTyDiCollection \ + -input /path/to/mrtydi-v1.1-th \ + -index indexes/lucene-index.mrtydi-v1.1-thai \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw -language th \ >& logs/log.mrtydi-v1.1-th & ``` @@ -26,18 +27,21 @@ For additional details, see explanation of [common indexing options](common-inde After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-thai.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-th.train.txt.gz \ - -output runs/run.mrtydi-v1.1-th.bm25.topics.mrtydi-v1.1-th.train.txt.gz \ - -language th -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-thai.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-th.dev.txt.gz \ - -output runs/run.mrtydi-v1.1-th.bm25.topics.mrtydi-v1.1-th.dev.txt.gz \ - -language th -bm25 -hits 100 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.mrtydi-v1.1-thai.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-th.test.txt.gz \ - -output runs/run.mrtydi-v1.1-th.bm25.topics.mrtydi-v1.1-th.test.txt.gz \ - -language th -bm25 -hits 100 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-thai \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-th.train.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-th.bm25.topics.mrtydi-v1.1-th.train.txt.gz \ + -bm25 -hits 100 -language th & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-thai \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-th.dev.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-th.bm25.topics.mrtydi-v1.1-th.dev.txt.gz \ + -bm25 -hits 100 -language th & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.mrtydi-v1.1-thai \ + -topics src/main/resources/topics-and-qrels/topics.mrtydi-v1.1-th.test.txt.gz -topicreader TsvInt \ + -output runs/run.mrtydi-v1.1-th.bm25.topics.mrtydi-v1.1-th.test.txt.gz \ + -bm25 -hits 100 -language th & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-doc-docTTTTTquery-per-doc.md b/docs/regressions-msmarco-doc-docTTTTTquery-per-doc.md index eaf7149a4d..102ce83759 100644 --- a/docs/regressions-msmarco-doc-docTTTTTquery-per-doc.md +++ b/docs/regressions-msmarco-doc-docTTTTTquery-per-doc.md @@ -16,11 +16,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-docTTTTTquery-per-doc \ - -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-docTTTTTquery-per-doc \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-docTTTTTquery-per-doc & ``` @@ -36,15 +37,17 @@ The regression experiments here evaluate on the 5193 dev set questions. After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default.topics.msmarco-doc.dev.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 4.68 -bm25.b 0.87 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-default.topics.msmarco-doc.dev.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-doc.bm25-tuned.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 4.68 -bm25.b 0.87 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-doc-docTTTTTquery-per-passage-v3.md b/docs/regressions-msmarco-doc-docTTTTTquery-per-passage-v3.md index 235aa82e0d..02db374d6e 100644 --- a/docs/regressions-msmarco-doc-docTTTTTquery-per-passage-v3.md +++ b/docs/regressions-msmarco-doc-docTTTTTquery-per-passage-v3.md @@ -19,11 +19,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-docTTTTTquery-per-passage-v3 \ - -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-docTTTTTquery-per-passage-v3 \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-docTTTTTquery-per-passage-v3 & ``` @@ -39,15 +40,17 @@ The regression experiments here evaluate on the 5193 dev set questions. After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage-v3.bm25-default.topics.msmarco-doc.dev.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage-v3.bm25-tuned.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage-v3.bm25-default.topics.msmarco-doc.dev.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage-v3.bm25-tuned.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-doc-docTTTTTquery-per-passage.md b/docs/regressions-msmarco-doc-docTTTTTquery-per-passage.md index 7738979ec8..860aabbc19 100644 --- a/docs/regressions-msmarco-doc-docTTTTTquery-per-passage.md +++ b/docs/regressions-msmarco-doc-docTTTTTquery-per-passage.md @@ -19,11 +19,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-docTTTTTquery-per-passage \ - -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-docTTTTTquery-per-passage \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-docTTTTTquery-per-passage & ``` @@ -39,15 +40,17 @@ The regression experiments here evaluate on the 5193 dev set questions. After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default.topics.msmarco-doc.dev.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-default.topics.msmarco-doc.dev.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-docTTTTTquery-per-passage.bm25-tuned.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-doc-per-passage-v2.md b/docs/regressions-msmarco-doc-per-passage-v2.md index d1e111c181..a798f521cd 100644 --- a/docs/regressions-msmarco-doc-per-passage-v2.md +++ b/docs/regressions-msmarco-doc-per-passage-v2.md @@ -19,11 +19,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-per-passage-v2 \ - -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-per-passage-v2 \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-per-passage-v2 & ``` @@ -39,45 +40,53 @@ The regression experiments here evaluate on the 5193 dev set questions. After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-default.topics.msmarco-doc.dev.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-default+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-default+ax.topics.msmarco-doc.dev.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-default+prf.topics.msmarco-doc.dev.txt \ - -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned+ax.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned+prf.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-default.topics.msmarco-doc.dev.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-default+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-default+ax.topics.msmarco-doc.dev.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-default+prf.topics.msmarco-doc.dev.txt \ + -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned+ax.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v2 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v2.bm25-tuned+prf.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-doc-per-passage-v3.md b/docs/regressions-msmarco-doc-per-passage-v3.md index 9403836b6a..a4c8ec5259 100644 --- a/docs/regressions-msmarco-doc-per-passage-v3.md +++ b/docs/regressions-msmarco-doc-per-passage-v3.md @@ -19,11 +19,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-per-passage-v3 \ - -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-per-passage-v3 \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-per-passage-v3 & ``` @@ -39,45 +40,53 @@ The regression experiments here evaluate on the 5193 dev set questions. After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-default.topics.msmarco-doc.dev.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-default+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-default+ax.topics.msmarco-doc.dev.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-default+prf.topics.msmarco-doc.dev.txt \ - -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned+ax.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned+prf.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-default.topics.msmarco-doc.dev.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-default+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-default+ax.topics.msmarco-doc.dev.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-default+prf.topics.msmarco-doc.dev.txt \ + -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned+ax.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage-v3 \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage-v3.bm25-tuned+prf.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-doc-per-passage.md b/docs/regressions-msmarco-doc-per-passage.md index 7d702399c3..6e31874cef 100644 --- a/docs/regressions-msmarco-doc-per-passage.md +++ b/docs/regressions-msmarco-doc-per-passage.md @@ -19,11 +19,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-doc-per-passage \ - -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-doc-per-passage \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc-per-passage & ``` @@ -39,45 +40,53 @@ The regression experiments here evaluate on the 5193 dev set questions. After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default.topics.msmarco-doc.dev.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+ax.topics.msmarco-doc.dev.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-default+prf.topics.msmarco-doc.dev.txt \ - -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+ax.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc-per-passage.bm25-tuned+prf.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default.topics.msmarco-doc.dev.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+ax.topics.msmarco-doc.dev.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-default+prf.topics.msmarco-doc.dev.txt \ + -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+ax.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc-per-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc-per-passage.bm25-tuned+prf.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-doc.md b/docs/regressions-msmarco-doc.md index c27e385148..e157d6d213 100644 --- a/docs/regressions-msmarco-doc.md +++ b/docs/regressions-msmarco-doc.md @@ -16,11 +16,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/msmarco-doc \ - -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 1 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/msmarco-doc \ + -index indexes/lucene-index.msmarco-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 1 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-doc & ``` @@ -36,35 +37,41 @@ The regression experiments here evaluate on the 5193 dev set questions. After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc.bm25-default+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc.bm25-tuned.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc.bm25-tuned2.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 4.46 -bm25.b 0.82 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ - -output runs/run.msmarco-doc.bm25-tuned2+rm3.topics.msmarco-doc.dev.txt \ - -bm25 -bm25.k1 4.46 -bm25.b 0.82 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-default+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned2.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 4.46 -bm25.b 0.82 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-doc.bm25-tuned2+rm3.topics.msmarco-doc.dev.txt \ + -bm25 -bm25.k1 4.46 -bm25.b 0.82 -rm3 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-passage-deepimpact.md b/docs/regressions-msmarco-passage-deepimpact.md index d143a0bdb2..0f7008c4e7 100644 --- a/docs/regressions-msmarco-passage-deepimpact.md +++ b/docs/regressions-msmarco-passage-deepimpact.md @@ -15,11 +15,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-passage-deepimpact \ - -index indexes/lucene-index.msmarco-passage-deepimpact \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-passage-deepimpact \ + -index indexes/lucene-index.msmarco-passage-deepimpact \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -impact -pretokenized \ >& logs/log.msmarco-passage-deepimpact & ``` @@ -36,10 +37,11 @@ The regression experiments here evaluate on the 6980 dev set questions; see [thi After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-deepimpact \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.deepimpact.tsv.gz \ - -output runs/run.msmarco-passage-deepimpact.deepimpact.topics.msmarco-passage.dev-subset.deepimpact.tsv.gz \ - -impact -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-deepimpact \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.deepimpact.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-passage-deepimpact.deepimpact.topics.msmarco-passage.dev-subset.deepimpact.tsv.gz \ + -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-passage-distill-splade-max.md b/docs/regressions-msmarco-passage-distill-splade-max.md index 6953a4ad06..36ecaeecc2 100644 --- a/docs/regressions-msmarco-passage-distill-splade-max.md +++ b/docs/regressions-msmarco-passage-distill-splade-max.md @@ -15,11 +15,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-passage-distill-splade-max \ - -index indexes/lucene-index.msmarco-passage-distill-splade-max \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-passage-distill-splade-max \ + -index indexes/lucene-index.msmarco-passage-distill-splade-max \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -impact -pretokenized \ >& logs/log.msmarco-passage-distill-splade-max & ``` @@ -36,10 +37,11 @@ The regression experiments here evaluate on the 6980 dev set questions; see [thi After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-distill-splade-max \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.distill-splade-max.tsv.gz \ - -output runs/run.msmarco-passage-distill-splade-max.distill-splade-max.topics.msmarco-passage.dev-subset.distill-splade-max.tsv.gz \ - -impact -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-distill-splade-max \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.distill-splade-max.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-passage-distill-splade-max.distill-splade-max.topics.msmarco-passage.dev-subset.distill-splade-max.tsv.gz \ + -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-passage-doc2query.md b/docs/regressions-msmarco-passage-doc2query.md index c303a1e4c3..76c39f0842 100644 --- a/docs/regressions-msmarco-passage-doc2query.md +++ b/docs/regressions-msmarco-passage-doc2query.md @@ -15,11 +15,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-passage-doc2query \ - -index indexes/lucene-index.msmarco-passage-doc2query.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 9 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-passage-doc2query \ + -index indexes/lucene-index.msmarco-passage-doc2query \ + -generator DefaultLuceneDocumentGenerator \ + -threads 9 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-passage-doc2query & ``` @@ -36,25 +37,29 @@ The regression experiments here evaluate on the 6980 dev set questions; see [thi After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-doc2query.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-doc2query.bm25-default.topics.msmarco-passage.dev-subset.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-doc2query.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-doc2query.bm25-default+rm3.topics.msmarco-passage.dev-subset.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-doc2query.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-doc2query.bm25-tuned.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-doc2query.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-doc2query.bm25-tuned+rm3.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-doc2query.bm25-default.topics.msmarco-passage.dev-subset.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-doc2query.bm25-default+rm3.topics.msmarco-passage.dev-subset.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-doc2query.bm25-tuned.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-doc2query \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-doc2query.bm25-tuned+rm3.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-passage-docTTTTTquery.md b/docs/regressions-msmarco-passage-docTTTTTquery.md index 19c8ed4e6f..162b772ef8 100644 --- a/docs/regressions-msmarco-passage-docTTTTTquery.md +++ b/docs/regressions-msmarco-passage-docTTTTTquery.md @@ -14,11 +14,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-passage-docTTTTTquery \ - -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 9 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-passage-docTTTTTquery \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -generator DefaultLuceneDocumentGenerator \ + -threads 9 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-passage-docTTTTTquery & ``` @@ -35,35 +36,41 @@ The regression experiments here evaluate on the 6980 dev set questions; see [thi After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-default.topics.msmarco-passage.dev-subset.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-default+rm3.topics.msmarco-passage.dev-subset.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned+rm3.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 2.18 -bm25.b 0.86 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2+rm3.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-default.topics.msmarco-passage.dev-subset.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-default+rm3.topics.msmarco-passage.dev-subset.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned+rm3.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 2.18 -bm25.b 0.86 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-docTTTTTquery \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage-docTTTTTquery.bm25-tuned2+rm3.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-passage-unicoil-tilde-expansion.md b/docs/regressions-msmarco-passage-unicoil-tilde-expansion.md index 1152f77c73..a323047c50 100644 --- a/docs/regressions-msmarco-passage-unicoil-tilde-expansion.md +++ b/docs/regressions-msmarco-passage-unicoil-tilde-expansion.md @@ -15,11 +15,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-passage-unicoil-tilde-expansion \ - -index indexes/lucene-index.msmarco-passage-unicoil-tilde-expansion \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-passage-unicoil-tilde-expansion \ + -index indexes/lucene-index.msmarco-passage-unicoil-tilde-expansion \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -impact -pretokenized \ >& logs/log.msmarco-passage-unicoil-tilde-expansion & ``` @@ -36,10 +37,11 @@ The regression experiments here evaluate on the 6980 dev set questions; see [thi After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-unicoil-tilde-expansion \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.unicoil-tilde-expansion.tsv.gz \ - -output runs/run.msmarco-passage-unicoil-tilde-expansion.unicoil-tilde-expansion.topics.msmarco-passage.dev-subset.unicoil-tilde-expansion.tsv.gz \ - -impact -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-unicoil-tilde-expansion \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.unicoil-tilde-expansion.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-passage-unicoil-tilde-expansion.unicoil-tilde-expansion.topics.msmarco-passage.dev-subset.unicoil-tilde-expansion.tsv.gz \ + -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-passage-unicoil.md b/docs/regressions-msmarco-passage-unicoil.md index d3b28adcdf..4c4510b648 100644 --- a/docs/regressions-msmarco-passage-unicoil.md +++ b/docs/regressions-msmarco-passage-unicoil.md @@ -15,11 +15,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-passage-unicoil \ - -index indexes/lucene-index.msmarco-passage-unicoil \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-passage-unicoil \ + -index indexes/lucene-index.msmarco-passage-unicoil \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -impact -pretokenized \ >& logs/log.msmarco-passage-unicoil & ``` @@ -36,10 +37,11 @@ The regression experiments here evaluate on the 6980 dev set questions; see [thi After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage-unicoil \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.unicoil.tsv.gz \ - -output runs/run.msmarco-passage-unicoil.unicoil.topics.msmarco-passage.dev-subset.unicoil.tsv.gz \ - -impact -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage-unicoil \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.unicoil.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-passage-unicoil.unicoil.topics.msmarco-passage.dev-subset.unicoil.tsv.gz \ + -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-passage.md b/docs/regressions-msmarco-passage.md index e13e000526..9326eca732 100644 --- a/docs/regressions-msmarco-passage.md +++ b/docs/regressions-msmarco-passage.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonCollection \ - -input /path/to/msmarco-passage \ - -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 9 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection JsonCollection \ + -input /path/to/msmarco-passage \ + -index indexes/lucene-index.msmarco-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 9 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-passage & ``` @@ -32,45 +33,53 @@ The regression experiments here evaluate on the 6980 dev set questions; see [thi After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-default+rm3.topics.msmarco-passage.dev-subset.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-default+ax.topics.msmarco-passage.dev-subset.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-default+prf.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25prf & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-tuned.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-tuned+rm3.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-tuned+ax.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ - -output runs/run.msmarco-passage.bm25-tuned+prf.topics.msmarco-passage.dev-subset.txt \ - -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+rm3.topics.msmarco-passage.dev-subset.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+ax.topics.msmarco-passage.dev-subset.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-default+prf.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25prf & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+rm3.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+ax.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-passage.dev-subset.txt -topicreader TsvInt \ + -output runs/run.msmarco-passage.bm25-tuned+prf.topics.msmarco-passage.dev-subset.txt \ + -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md b/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md index 06b591b357..3da1c35b49 100644 --- a/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md +++ b/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot \ - -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot \ + -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -impact -pretokenized \ >& logs/log.msmarco-v2-doc-segmented-unicoil-noexp-0shot & ``` @@ -30,14 +31,16 @@ These regression experiments use the [dev queries](../src/main/resources/topics- After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.tsv.gz \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.tsv.gz \ - -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.unicoil-noexp.0shot.tsv.gz \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev2.unicoil-noexp.0shot.tsv.gz \ - -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.tsv.gz \ + -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.unicoil-noexp.0shot.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev2.unicoil-noexp.0shot.tsv.gz \ + -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: @@ -55,7 +58,7 @@ tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank sr With the above commands, you should be able to reproduce the following results: -MAP | uniCOIL (no expansion, zero-shot)| +MAP@100 | uniCOIL (no expansion, zero-shot)| :---------------------------------------|-----------| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.2050 | [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.2082 | diff --git a/docs/regressions-msmarco-v2-doc-segmented.md b/docs/regressions-msmarco-v2-doc-segmented.md index 13c3a742b7..01b4e60d1e 100644 --- a/docs/regressions-msmarco-v2-doc-segmented.md +++ b/docs/regressions-msmarco-v2-doc-segmented.md @@ -12,11 +12,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \ - -input /path/to/msmarco-v2-doc-segmented \ - -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2DocCollection \ + -input /path/to/msmarco-v2-doc-segmented \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-doc-segmented & ``` @@ -33,41 +34,49 @@ These regression experiments use the [dev queries](../src/main/resources/topics- After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default.topics.msmarco-v2-doc.dev.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default.topics.msmarco-v2-doc.dev2.txt \ - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+rm3.topics.msmarco-v2-doc.dev.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+rm3.topics.msmarco-v2-doc.dev2.txt \ - -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+ax.topics.msmarco-v2-doc.dev.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+ax.topics.msmarco-v2-doc.dev2.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+prf.topics.msmarco-v2-doc.dev.txt \ - -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc-segmented.bm25-default+prf.topics.msmarco-v2-doc.dev2.txt \ - -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default.topics.msmarco-v2-doc.dev.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default.topics.msmarco-v2-doc.dev2.txt \ + -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+rm3.topics.msmarco-v2-doc.dev.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+rm3.topics.msmarco-v2-doc.dev2.txt \ + -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+ax.topics.msmarco-v2-doc.dev.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+ax.topics.msmarco-v2-doc.dev2.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+prf.topics.msmarco-v2-doc.dev.txt \ + -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc-segmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc-segmented.bm25-default+prf.topics.msmarco-v2-doc.dev2.txt \ + -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: @@ -106,7 +115,7 @@ tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank sr With the above commands, you should be able to reproduce the following results: -MAP | BM25 (default)| +RM3 | +Ax | +PRF | +MAP@100 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.1875 | 0.1648 | 0.1344 | 0.1528 | [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.1903 | 0.1679 | 0.1345 | 0.1565 | diff --git a/docs/regressions-msmarco-v2-doc.md b/docs/regressions-msmarco-v2-doc.md index effebda917..8ae40241af 100644 --- a/docs/regressions-msmarco-v2-doc.md +++ b/docs/regressions-msmarco-v2-doc.md @@ -12,11 +12,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \ - -input /path/to/msmarco-v2-doc \ - -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2DocCollection \ + -input /path/to/msmarco-v2-doc \ + -index indexes/lucene-index.msmarco-v2-doc \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-doc & ``` @@ -33,41 +34,49 @@ These regression experiments use the [dev queries](../src/main/resources/topics- After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc.bm25-default.topics.msmarco-v2-doc.dev.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc.bm25-default.topics.msmarco-v2-doc.dev2.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+rm3.topics.msmarco-v2-doc.dev.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+rm3.topics.msmarco-v2-doc.dev2.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+ax.topics.msmarco-v2-doc.dev.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+ax.topics.msmarco-v2-doc.dev2.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+prf.topics.msmarco-v2-doc.dev.txt \ - -bm25 -bm25prf & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ - -output runs/run.msmarco-v2-doc.bm25-default+prf.topics.msmarco-v2-doc.dev2.txt \ - -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default.topics.msmarco-v2-doc.dev.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default.topics.msmarco-v2-doc.dev2.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+rm3.topics.msmarco-v2-doc.dev.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+rm3.topics.msmarco-v2-doc.dev2.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+ax.topics.msmarco-v2-doc.dev.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+ax.topics.msmarco-v2-doc.dev2.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+prf.topics.msmarco-v2-doc.dev.txt \ + -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-doc \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-doc.bm25-default+prf.topics.msmarco-v2-doc.dev2.txt \ + -bm25 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -106,7 +115,7 @@ tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank sr With the above commands, you should be able to reproduce the following results: -MAP | BM25 (default)| +RM3 | +Ax | +PRF | +MAP@100 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.1552 | 0.0966 | 0.0665 | 0.0834 | [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.1639 | 0.1011 | 0.0722 | 0.0876 | diff --git a/docs/regressions-msmarco-v2-passage-augmented.md b/docs/regressions-msmarco-v2-passage-augmented.md index 93b9c39353..bb3c586fe1 100644 --- a/docs/regressions-msmarco-v2-passage-augmented.md +++ b/docs/regressions-msmarco-v2-passage-augmented.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \ - -input /path/to/msmarco-v2-passage-augmented \ - -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2PassageCollection \ + -input /path/to/msmarco-v2-passage-augmented \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-passage-augmented & ``` @@ -32,41 +33,49 @@ These regression experiments use the [dev queries](../src/main/resources/topics- After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default.topics.msmarco-v2-passage.dev.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default.topics.msmarco-v2-passage.dev2.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+rm3.topics.msmarco-v2-passage.dev.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+rm3.topics.msmarco-v2-passage.dev2.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+ax.topics.msmarco-v2-passage.dev.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+ax.topics.msmarco-v2-passage.dev2.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+prf.topics.msmarco-v2-passage.dev.txt \ - -bm25 -bm25prf & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage-augmented.bm25-default+prf.topics.msmarco-v2-passage.dev2.txt \ - -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default.topics.msmarco-v2-passage.dev.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default.topics.msmarco-v2-passage.dev2.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+rm3.topics.msmarco-v2-passage.dev.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+rm3.topics.msmarco-v2-passage.dev2.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+ax.topics.msmarco-v2-passage.dev.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+ax.topics.msmarco-v2-passage.dev2.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+prf.topics.msmarco-v2-passage.dev.txt \ + -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-augmented \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-augmented.bm25-default+prf.topics.msmarco-v2-passage.dev2.txt \ + -bm25 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -105,7 +114,7 @@ tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank sr With the above commands, you should be able to reproduce the following results: -MAP | BM25 (default)| +RM3 | +Ax | +PRF | +MAP@100 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [MS MARCO V2 Passage: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.0863 | 0.0669 | 0.0431 | 0.0542 | [MS MARCO V2 Passage: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.0904 | 0.0692 | 0.0442 | 0.0563 | diff --git a/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md b/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md index 1949200103..17764a47c0 100644 --- a/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md +++ b/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection JsonVectorCollection \ - -input /path/to/msmarco-v2-passage-unicoil-noexp-0shot \ - -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -impact -pretokenized \ +target/appassembler/bin/IndexCollection \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2-passage-unicoil-noexp-0shot \ + -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -impact -pretokenized \ >& logs/log.msmarco-v2-passage-unicoil-noexp-0shot & ``` @@ -30,14 +31,16 @@ These regression experiments use the [dev queries](../src/main/resources/topics- After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.tsv.gz \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.tsv.gz \ - -impact -pretokenized & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.unicoil-noexp.0shot.tsv.gz \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-passage.dev2.unicoil-noexp.0shot.tsv.gz \ - -impact -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.tsv.gz \ + -impact -pretokenized & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.unicoil-noexp.0shot.tsv.gz -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-passage.dev2.unicoil-noexp.0shot.tsv.gz \ + -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: @@ -55,7 +58,7 @@ tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank sr With the above commands, you should be able to reproduce the following results: -MAP | uniCOIL (no expansion, zero-shot)| +MAP@100 | uniCOIL (no expansion, zero-shot)| :---------------------------------------|-----------| [MS MARCO V2 Passage: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.1333 | [MS MARCO V2 Passage: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.1374 | diff --git a/docs/regressions-msmarco-v2-passage.md b/docs/regressions-msmarco-v2-passage.md index 2050ee04d3..1150be220d 100644 --- a/docs/regressions-msmarco-v2-passage.md +++ b/docs/regressions-msmarco-v2-passage.md @@ -12,11 +12,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \ - -input /path/to/msmarco-v2-passage \ - -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 18 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection MsMarcoV2PassageCollection \ + -input /path/to/msmarco-v2-passage \ + -index indexes/lucene-index.msmarco-v2-passage \ + -generator DefaultLuceneDocumentGenerator \ + -threads 18 -storePositions -storeDocvectors -storeRaw \ >& logs/log.msmarco-v2-passage & ``` @@ -33,41 +34,49 @@ These regression experiments use the [dev queries](../src/main/resources/topics- After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage.bm25-default.topics.msmarco-v2-passage.dev.txt \ - -bm25 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage.bm25-default.topics.msmarco-v2-passage.dev2.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+rm3.topics.msmarco-v2-passage.dev.txt \ - -bm25 -rm3 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+rm3.topics.msmarco-v2-passage.dev2.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+ax.topics.msmarco-v2-passage.dev.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+ax.topics.msmarco-v2-passage.dev2.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+prf.topics.msmarco-v2-passage.dev.txt \ - -bm25 -bm25prf & -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw \ - -topicreader TsvInt -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt \ - -output runs/run.msmarco-v2-passage.bm25-default+prf.topics.msmarco-v2-passage.dev2.txt \ - -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default.topics.msmarco-v2-passage.dev.txt \ + -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default.topics.msmarco-v2-passage.dev2.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+rm3.topics.msmarco-v2-passage.dev.txt \ + -bm25 -rm3 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+rm3.topics.msmarco-v2-passage.dev2.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+ax.topics.msmarco-v2-passage.dev.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+ax.topics.msmarco-v2-passage.dev2.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+prf.topics.msmarco-v2-passage.dev.txt \ + -bm25 -bm25prf & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.msmarco-v2-passage \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev2.txt -topicreader TsvInt \ + -output runs/run.msmarco-v2-passage.bm25-default+prf.topics.msmarco-v2-passage.dev2.txt \ + -bm25 -bm25prf & ``` Evaluation can be performed using `trec_eval`: @@ -106,7 +115,7 @@ tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank sr With the above commands, you should be able to reproduce the following results: -MAP | BM25 (default)| +RM3 | +Ax | +PRF | +MAP@100 | BM25 (default)| +RM3 | +Ax | +PRF | :---------------------------------------|-----------|-----------|-----------|-----------| [MS MARCO V2 Passage: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.0709 | 0.0611 | 0.0592 | 0.0595 | [MS MARCO V2 Passage: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)| 0.0794 | 0.0647 | 0.0642 | 0.0632 | diff --git a/docs/regressions-ntcir8-zh.md b/docs/regressions-ntcir8-zh.md index 87da8f551e..7fdb6c5e19 100644 --- a/docs/regressions-ntcir8-zh.md +++ b/docs/regressions-ntcir8-zh.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/ntcir8-zh \ - -index indexes/lucene-index.ntcir8-zh.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw -language zh -uniqueDocid -optimize \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/ntcir8-zh \ + -index indexes/lucene-index.ntcir8-zh \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw -language zh -uniqueDocid -optimize \ >& logs/log.ntcir8-zh & ``` @@ -36,10 +37,11 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.ntcir8-zh.pos+docvectors+raw \ - -topicreader TsvString -topics src/main/resources/topics-and-qrels/topics.ntcir8zh.eval.txt \ - -output runs/run.ntcir8-zh.bm25.topics.ntcir8zh.eval.txt \ - -language zh -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.ntcir8-zh \ + -topics src/main/resources/topics-and-qrels/topics.ntcir8zh.eval.txt -topicreader TsvString \ + -output runs/run.ntcir8-zh.bm25.topics.ntcir8zh.eval.txt \ + -bm25 -language zh & ``` Evaluation can be performed using `trec_eval`: @@ -62,6 +64,6 @@ P20 | BM25 | [NTCIR-8 ACLIA (IR4QA subtask, Monolingual Chinese)](../src/main/resources/topics-and-qrels/topics.ntcir8zh.eval.txt)| 0.3849 | -NDCG20 | BM25 | +nDCG@20 | BM25 | :---------------------------------------|-----------| [NTCIR-8 ACLIA (IR4QA subtask, Monolingual Chinese)](../src/main/resources/topics-and-qrels/topics.ntcir8zh.eval.txt)| 0.4757 | diff --git a/docs/regressions-robust05.md b/docs/regressions-robust05.md index 985d677fc8..1d22933aa4 100644 --- a/docs/regressions-robust05.md +++ b/docs/regressions-robust05.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection TrecCollection \ - -input /path/to/robust05 \ - -index indexes/lucene-index.robust05.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection TrecCollection \ + -input /path/to/robust05 \ + -index indexes/lucene-index.robust05 \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.robust05 & ``` @@ -31,35 +32,41 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.robust05.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust05.txt \ - -output runs/run.robust05.bm25.topics.robust05.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.robust05.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust05.txt \ - -output runs/run.robust05.bm25+rm3.topics.robust05.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.robust05.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust05.txt \ - -output runs/run.robust05.bm25+ax.topics.robust05.txt \ - -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.robust05.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust05.txt \ - -output runs/run.robust05.ql.topics.robust05.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.robust05.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust05.txt \ - -output runs/run.robust05.ql+rm3.topics.robust05.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.robust05.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.robust05.txt \ - -output runs/run.robust05.ql+ax.topics.robust05.txt \ - -qld -axiom -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.robust05 \ + -topics src/main/resources/topics-and-qrels/topics.robust05.txt -topicreader Trec \ + -output runs/run.robust05.bm25.topics.robust05.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.robust05 \ + -topics src/main/resources/topics-and-qrels/topics.robust05.txt -topicreader Trec \ + -output runs/run.robust05.bm25+rm3.topics.robust05.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.robust05 \ + -topics src/main/resources/topics-and-qrels/topics.robust05.txt -topicreader Trec \ + -output runs/run.robust05.bm25+ax.topics.robust05.txt \ + -bm25 -axiom -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.robust05 \ + -topics src/main/resources/topics-and-qrels/topics.robust05.txt -topicreader Trec \ + -output runs/run.robust05.ql.topics.robust05.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.robust05 \ + -topics src/main/resources/topics-and-qrels/topics.robust05.txt -topicreader Trec \ + -output runs/run.robust05.ql+rm3.topics.robust05.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.robust05 \ + -topics src/main/resources/topics-and-qrels/topics.robust05.txt -topicreader Trec \ + -output runs/run.robust05.ql+ax.topics.robust05.txt \ + -qld -axiom -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/docs/regressions-trec02-ar.md b/docs/regressions-trec02-ar.md index 469c26b95e..161bf4a766 100644 --- a/docs/regressions-trec02-ar.md +++ b/docs/regressions-trec02-ar.md @@ -11,11 +11,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \ - -input /path/to/trec02-ar \ - -index indexes/lucene-index.trec02-ar.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw -language ar \ +target/appassembler/bin/IndexCollection \ + -collection CleanTrecCollection \ + -input /path/to/trec02-ar \ + -index indexes/lucene-index.trec02-ar \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw -language ar \ >& logs/log.trec02-ar & ``` @@ -36,10 +37,11 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.trec02-ar.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.trec02ar-ar.txt \ - -output runs/run.trec02-ar.bm25.topics.trec02ar-ar.txt \ - -language ar -bm25 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.trec02-ar \ + -topics src/main/resources/topics-and-qrels/topics.trec02ar-ar.txt -topicreader Trec \ + -output runs/run.trec02-ar.bm25.topics.trec02ar-ar.txt \ + -bm25 -language ar & ``` Evaluation can be performed using `trec_eval`: @@ -62,6 +64,6 @@ P20 | BM25 | [TREC 2002 (Monolingual Arabic)](../src/main/resources/topics-and-qrels/topics.trec02ar-ar.txt)| 0.3610 | -NDCG20 | BM25 | +nDCG@20 | BM25 | :---------------------------------------|-----------| [TREC 2002 (Monolingual Arabic)](../src/main/resources/topics-and-qrels/topics.trec02ar-ar.txt)| 0.4056 | diff --git a/docs/regressions-wt10g.md b/docs/regressions-wt10g.md index b8969a9e47..f4b20896be 100644 --- a/docs/regressions-wt10g.md +++ b/docs/regressions-wt10g.md @@ -9,11 +9,12 @@ Note that this page is automatically generated from [this template](../src/main/ Typical indexing command: ``` -nohup sh target/appassembler/bin/IndexCollection -collection TrecwebCollection \ - -input /path/to/wt10g \ - -index indexes/lucene-index.wt10g.pos+docvectors+raw \ - -generator DefaultLuceneDocumentGenerator \ - -threads 16 -storePositions -storeDocvectors -storeRaw \ +target/appassembler/bin/IndexCollection \ + -collection TrecwebCollection \ + -input /path/to/wt10g \ + -index indexes/lucene-index.wt10g \ + -generator DefaultLuceneDocumentGenerator \ + -threads 16 -storePositions -storeDocvectors -storeRaw \ >& logs/log.wt10g & ``` @@ -31,35 +32,41 @@ Topics and qrels are stored in [`src/main/resources/topics-and-qrels/`](../src/m After indexing has completed, you should be able to perform retrieval as follows: ``` -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wt10g.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt \ - -output runs/run.wt10g.bm25.topics.adhoc.451-550.txt \ - -bm25 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wt10g.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt \ - -output runs/run.wt10g.bm25+rm3.topics.adhoc.451-550.txt \ - -bm25 -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wt10g.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt \ - -output runs/run.wt10g.bm25+ax.topics.adhoc.451-550.txt \ - -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wt10g.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt \ - -output runs/run.wt10g.ql.topics.adhoc.451-550.txt \ - -qld & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wt10g.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt \ - -output runs/run.wt10g.ql+rm3.topics.adhoc.451-550.txt \ - -qld -rm3 & - -nohup target/appassembler/bin/SearchCollection -index indexes/lucene-index.wt10g.pos+docvectors+raw \ - -topicreader Trec -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt \ - -output runs/run.wt10g.ql+ax.topics.adhoc.451-550.txt \ - -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wt10g \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt -topicreader Trec \ + -output runs/run.wt10g.bm25.topics.adhoc.451-550.txt \ + -bm25 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wt10g \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt -topicreader Trec \ + -output runs/run.wt10g.bm25+rm3.topics.adhoc.451-550.txt \ + -bm25 -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wt10g \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt -topicreader Trec \ + -output runs/run.wt10g.bm25+ax.topics.adhoc.451-550.txt \ + -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wt10g \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt -topicreader Trec \ + -output runs/run.wt10g.ql.topics.adhoc.451-550.txt \ + -qld & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wt10g \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt -topicreader Trec \ + -output runs/run.wt10g.ql+rm3.topics.adhoc.451-550.txt \ + -qld -rm3 & + +target/appassembler/bin/SearchCollection \ + -index indexes/lucene-index.wt10g \ + -topics src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt -topicreader Trec \ + -output runs/run.wt10g.ql+ax.topics.adhoc.451-550.txt \ + -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 & ``` Evaluation can be performed using `trec_eval`: diff --git a/src/main/python/run_regression.py b/src/main/python/run_regression.py index d790303389..afaf6f55b3 100644 --- a/src/main/python/run_regression.py +++ b/src/main/python/run_regression.py @@ -47,7 +47,7 @@ INDEX_COMMAND = 'target/appassembler/bin/IndexCollection' INDEX_STATS_COMMAND = 'target/appassembler/bin/IndexReaderUtils' - +SEARCH_COMMAND = 'target/appassembler/bin/SearchCollection' def is_close(a, b, rel_tol=1e-09, abs_tol=0.0): return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) @@ -84,7 +84,7 @@ def construct_indexing_command(yaml_data, args): Returns: (:obj:`list` of :obj:`str`): the command as a list that can be executed by calling subprocess.call(command) """ - logger.info('='*10+'Indexing'+'='*10) + logger.info('='*10 + ' Indexing ' + '='*10) # Determine the input collection path, either from the command line, # or by checking various locations specified in the YAML. @@ -131,9 +131,9 @@ def verify_index(yaml_data, build_index=True, dry_run=False): Args: yaml_data (dict): the yaml config """ - logger.info('='*10+'Verifying Index'+'='*10) + logger.info('='*10 + ' Verifying Index ' + '='*10) index_path = get_index_path(yaml_data) - logger.info('[Index]: ' + index_path) + logger.info('index: ' + index_path) index_utils_command = [ os.path.join(yaml_data['root'], INDEX_STATS_COMMAND), '-index', index_path, '-stats' @@ -150,7 +150,7 @@ def verify_index(yaml_data, build_index=True, dry_run=False): print('{}: expected={}, actual={}'.format(stat, yaml_data['index_stats'][stat], value)) assert value == yaml_data['index_stats'][stat] logger.info(line) - logger.info('='*10+'Verifying Index Succeed'+'='*10) + logger.info('Index statistics successfully verified!') def generate_run_file_name(corpus, topic, model_name): @@ -183,13 +183,13 @@ def construct_ranking_command(output_root, yaml_data, build_index=True): """ ranking_commands = [ [ - os.path.join(yaml_data['root'], yaml_data['search_command']), - '-topicreader', yaml_data['topic_reader'], + SEARCH_COMMAND, '-index', get_index_path(yaml_data), - ' '.join(model['params']), '-topics', os.path.join(yaml_data['root'], yaml_data['topic_root'], topic['path']), + '-topicreader', yaml_data['topic_reader'], '-output', os.path.join(output_root, generate_run_file_name(yaml_data['corpus'], topic, model['name'])), - ] + (yaml_data['search_options'] if 'search_options' in yaml_data else []) + model['params'] + ] for (model, topic) in list(itertools.product(yaml_data['models'], yaml_data['topics'])) ] return ranking_commands @@ -205,11 +205,11 @@ def evaluate_and_verify(output_root, yaml_data, fail_eval, dry_run): """ fail_str = '\033[91m[FAIL]\033[0m ' ok_str = ' [OK] ' - logger.info('='*10+'Verifying Results'+'='*10) + logger.info('='*10 + ' Verifying Results: ' + yaml_data['corpus'] + ' ' + '='*10) success = True for model in yaml_data['models']: for i, topic in enumerate(yaml_data['topics']): - for eval in yaml_data['evals']: + for eval in yaml_data['metrics']: eval_cmd = [ os.path.join(yaml_data['root'], eval['command']), eval['params'] if 'params' in eval and eval['params'] else '', os.path.join(yaml_data['root'], yaml_data['qrels_root'], topic['qrel']), @@ -225,7 +225,7 @@ def evaluate_and_verify(output_root, yaml_data, fail_eval, dry_run): eval_out = out.strip().split(eval['separator'])[eval['parse_index']] expected = round(model['results'][eval['metric']][i], eval['metric_precision']) actual = round(float(eval_out), eval['metric_precision']) - result_str = 'expected: {0:.4f} actual: {1:.4f} - metric: {2:<8} model: {3}'.format(expected, actual, eval['metric'], model['name']) + result_str = 'expected: {0:.4f} actual: {1:.4f} - metric: {2:<8} model: {3} topics: {4}'.format(expected, actual, eval['metric'], model['name'], topic['id']) if is_close(expected, actual): logger.info(ok_str + result_str) else: @@ -281,7 +281,7 @@ def ranking_atom(cmd): verify_index(yaml_data, args.index, args.dry_run) if not args.no_retrieval: - logger.info('='*10+'Ranking'+'='*10) + logger.info('='*10 + ' Ranking ' + '='*10) run_cmds = construct_ranking_command(args.output_root, yaml_data, args.index) p = Pool(args.parallelism) p.map(ranking_atom, run_cmds) diff --git a/src/main/resources/regression/backgroundlinking18.yaml b/src/main/resources/regression/backgroundlinking18.yaml index 61977d1fce..5a355ffcb9 100644 --- a/src/main/resources/regression/backgroundlinking18.yaml +++ b/src/main/resources/regression/backgroundlinking18.yaml @@ -2,7 +2,7 @@ corpus: wapo.v2 corpus_path: collections/newswire/WashingtonPost.v2/data/ -index_path: indexes/lucene-index.wapo.v2.pos+docvectors+raw +index_path: indexes/lucene-index.wapo.v2 collection: WashingtonPostCollection generator: WashingtonPostGenerator threads: 1 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 595030 total terms: 318219870 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -c -M1000 -m ndcg_cut.5 +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -M1000 -m map separator: "\t" parse_index: 2 - metric: NCDG@5 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -c -M1000 -m map + - metric: nDCG@5 + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -M1000 -m ndcg_cut.5 separator: "\t" parse_index: 2 - metric: AP metric_precision: 4 can_combine: true topic_reader: BackgroundLinking +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2018 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking18.txt)" id: bglink18 @@ -43,28 +40,25 @@ topics: models: - name: bm25 display: BM25 - params: - - -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 + params: -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 results: - AP: + MAP: - 0.2490 - NCDG@5: + nDCG@5: - 0.3293 - name: bm25+rm3 display: +RM3 - params: - - -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 + params: -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 results: - AP: + MAP: - 0.2642 - NCDG@5: + nDCG@5: - 0.3526 - name: bm25+rm3+df display: +RM3+DF - params: - - -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 + params: -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 results: - AP: + MAP: - 0.2692 - NCDG@5: + nDCG@5: - 0.4171 diff --git a/src/main/resources/regression/backgroundlinking19.yaml b/src/main/resources/regression/backgroundlinking19.yaml index eeb1664872..ffaf0cdf12 100644 --- a/src/main/resources/regression/backgroundlinking19.yaml +++ b/src/main/resources/regression/backgroundlinking19.yaml @@ -2,7 +2,7 @@ corpus: wapo.v2 corpus_path: collections/newswire/WashingtonPost.v2/data/ -index_path: indexes/lucene-index.wapo.v2.pos+docvectors+raw +index_path: indexes/lucene-index.wapo.v2 collection: WashingtonPostCollection generator: WashingtonPostGenerator threads: 1 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 595030 total terms: 318219870 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -c -M1000 -m ndcg_cut.5 +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -M1000 -m map separator: "\t" parse_index: 2 - metric: NCDG@5 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -c -M1000 -m map + - metric: nDCG@5 + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -M1000 -m ndcg_cut.5 separator: "\t" parse_index: 2 - metric: AP metric_precision: 4 can_combine: true topic_reader: BackgroundLinking +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2019 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking19.txt)" id: bglink19 @@ -43,28 +40,25 @@ topics: models: - name: bm25 display: BM25 - params: - - -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 + params: -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 results: - AP: + MAP: - 0.3029 - NCDG@5: + nDCG@5: - 0.4785 - name: bm25+rm3 display: +RM3 - params: - - -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 + params: -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 results: - AP: + MAP: - 0.3786 - NCDG@5: + nDCG@5: - 0.5217 - name: bm25+rm3+df display: +RM3+DF - params: - - -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 + params: -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 results: - AP: + MAP: - 0.3154 - NCDG@5: + nDCG@5: - 0.5051 diff --git a/src/main/resources/regression/backgroundlinking20.yaml b/src/main/resources/regression/backgroundlinking20.yaml index 31274e46c4..7ad45ccaac 100644 --- a/src/main/resources/regression/backgroundlinking20.yaml +++ b/src/main/resources/regression/backgroundlinking20.yaml @@ -2,7 +2,7 @@ corpus: wapo.v3 corpus_path: collections/newswire/WashingtonPost.v3/data/ -index_path: indexes/lucene-index.wapo.v3.pos+docvectors+raw +index_path: indexes/lucene-index.wapo.v3 collection: WashingtonPostCollection generator: WashingtonPostGenerator threads: 1 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 671945 total terms: 366108177 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -c -M1000 -m ndcg_cut.5 +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -M1000 -m map separator: "\t" parse_index: 2 - metric: NCDG@5 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -c -M1000 -m map + - metric: nDCG@5 + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -M1000 -m ndcg_cut.5 separator: "\t" parse_index: 2 - metric: AP metric_precision: 4 can_combine: true topic_reader: BackgroundLinking +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2020 Topics](../src/main/resources/topics-and-qrels/topics.backgroundlinking20.txt)" id: bglink20 @@ -43,28 +40,25 @@ topics: models: - name: bm25 display: BM25 - params: - - -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 + params: -backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 results: - AP: + MAP: - 0.3286 - NCDG@5: + nDCG@5: - 0.5231 - name: bm25+rm3 display: +RM3 - params: - - -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 + params: -backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 results: - AP: + MAP: - 0.4519 - NCDG@5: + nDCG@5: - 0.5673 - name: bm25+rm3+df display: +RM3+DF - params: - - -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 + params: -backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 results: - AP: + MAP: - 0.3438 - NCDG@5: + nDCG@5: - 0.5316 diff --git a/src/main/resources/regression/cacm.yaml b/src/main/resources/regression/cacm.yaml index 8716191b38..1f521fee43 100644 --- a/src/main/resources/regression/cacm.yaml +++ b/src/main/resources/regression/cacm.yaml @@ -2,7 +2,7 @@ corpus: cacm corpus_path: src/main/resources/cacm/ -index_path: indexes/lucene-index.cacm.pos+docvectors +index_path: indexes/lucene-index.cacm collection: HtmlCollection generator: DefaultLuceneDocumentGenerator threads: 8 @@ -13,28 +13,25 @@ index_stats: unique terms: 14363 total terms: 320968 -search_command: target/appassembler/bin/SearchCollection -ranking_root: -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Cacm +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "CACM All" id: cacm @@ -43,58 +40,44 @@ topics: models: - name: bm25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.3123 - p30: + P30: - 0.1942 - name: bm25+rm3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.3643 - p30: + P30: - 0.2237 - name: bm25+ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.3077 - p30: + P30: - 0.1955 - name: ql - params: - - -qld + params: -qld results: - map: + MAP: - 0.3265 - p30: + P30: - 0.1942 - name: ql+rm3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.3620 - p30: + P30: - 0.2218 - name: ql+ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2907 - p30: + P30: - 0.1840 diff --git a/src/main/resources/regression/car17v1.5.yaml b/src/main/resources/regression/car17v1.5.yaml index 7d0da73eeb..8674645788 100644 --- a/src/main/resources/regression/car17v1.5.yaml +++ b/src/main/resources/regression/car17v1.5.yaml @@ -2,7 +2,7 @@ corpus: car-paragraphCorpus.v1.5 corpus_path: collections/car/paragraphCorpus.v1.5/ -index_path: indexes/lucene-index.car-paragraphCorpus.v1.5.pos+docvectors+raw +index_path: indexes/lucene-index.car-paragraphCorpus.v1.5 collection: CarCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 29674425 total terms: 1257909884 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -m map +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval - params: -m recip_rank + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval + params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: recip_rank metric_precision: 4 can_combine: true topic_reader: Car +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2017 CAR: benchmarkY1test (v1.5)](../src/main/resources/topics-and-qrels/topics.car17v1.5.benchmarkY1test.txt/)" id: car17v1.5 @@ -43,63 +40,49 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1562 - recip_rank: + MRR: - 0.2331 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.1295 - recip_rank: + MRR: - 0.1923 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1358 - recip_rank: + MRR: - 0.1949 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.1386 - recip_rank: + MRR: - 0.2037 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.1080 - recip_rank: + MRR: - 0.1599 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1048 - recip_rank: + MRR: - 0.1524 diff --git a/src/main/resources/regression/car17v2.0-doc2query.yaml b/src/main/resources/regression/car17v2.0-doc2query.yaml index 2890aff8b6..0d621dd0ed 100644 --- a/src/main/resources/regression/car17v2.0-doc2query.yaml +++ b/src/main/resources/regression/car17v2.0-doc2query.yaml @@ -2,7 +2,7 @@ corpus: car-paragraphCorpus.v2.0-doc2query corpus_path: collections/car/paragraphCorpus.v2.0-expanded-topk10/ -index_path: indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query.pos+docvectors+raw +index_path: indexes/lucene-index.car-paragraphCorpus.v2.0-doc2query collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 30 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 29794694 total terms: 2541082416 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: recip_rank metric_precision: 4 can_combine: true topic_reader: Car +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2017 CAR: benchmarkY1test (v2.0)](../src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt)" id: car17v2.0 @@ -43,63 +40,49 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1807 - recip_rank: + MRR: - 0.2750 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.1521 - recip_rank: + MRR: - 0.2275 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1470 - recip_rank: + MRR: - 0.2186 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.1752 - recip_rank: + MRR: - 0.2653 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.1453 - recip_rank: + MRR: - 0.2156 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1339 - recip_rank: + MRR: - 0.1981 diff --git a/src/main/resources/regression/car17v2.0.yaml b/src/main/resources/regression/car17v2.0.yaml index 98c64911a2..3c3a42f9ea 100644 --- a/src/main/resources/regression/car17v2.0.yaml +++ b/src/main/resources/regression/car17v2.0.yaml @@ -2,7 +2,7 @@ corpus: car-paragraphCorpus.v2.0 corpus_path: collections/car/paragraphCorpus.v2.0/ -index_path: indexes/lucene-index.car-paragraphCorpus.v2.0.pos+docvectors+raw +index_path: indexes/lucene-index.car-paragraphCorpus.v2.0 collection: CarCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 29791059 total terms: 1249754054 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: recip_rank metric_precision: 4 can_combine: true topic_reader: Car +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2017 CAR: benchmarkY1test (v2.0)](../src/main/resources/topics-and-qrels/topics.car17v2.0.benchmarkY1test.txt)" id: car17v2.0 @@ -43,63 +40,49 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1545 - recip_rank: + MRR: - 0.2321 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.1286 - recip_rank: + MRR: - 0.1927 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1364 - recip_rank: + MRR: - 0.1978 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.1371 - recip_rank: + MRR: - 0.2013 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.1080 - recip_rank: + MRR: - 0.1598 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1077 - recip_rank: + MRR: - 0.1588 diff --git a/src/main/resources/regression/clef06-fr.yaml b/src/main/resources/regression/clef06-fr.yaml index 893f0d620a..baefd5b76c 100644 --- a/src/main/resources/regression/clef06-fr.yaml +++ b/src/main/resources/regression/clef06-fr.yaml @@ -2,7 +2,7 @@ corpus: clef06-fr corpus_path: collections/newswire/clir/clef2006-fr.json -index_path: indexes/lucene-index.clef06-fr.pos+docvectors+raw +index_path: indexes/lucene-index.clef06-fr collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,38 +12,32 @@ index_stats: documents (non-empty): 171109 total terms: 34352833 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language fr -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.20 separator: "\t" parse_index: 2 - metric: p20 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.20 separator: "\t" parse_index: 2 - metric: ndcg20 metric_precision: 4 can_combine: true topic_reader: TsvString +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[CLEF 2006 (Monolingual French)](../src/main/resources/topics-and-qrels/topics.clef06fr.mono.fr.txt)" id: clef06fr @@ -53,12 +47,11 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 -language fr results: - map: + MAP: - 0.3111 - p20: + P20: - 0.3184 - ndcg20: + nDCG@20: - 0.4458 diff --git a/src/main/resources/regression/core17.yaml b/src/main/resources/regression/core17.yaml index 988784d1a5..6cdb026f9e 100644 --- a/src/main/resources/regression/core17.yaml +++ b/src/main/resources/regression/core17.yaml @@ -2,7 +2,7 @@ corpus: nyt corpus_path: collections/newswire/NYTcorpus/ -index_path: indexes/lucene-index.nyt.pos+docvectors+raw +index_path: indexes/lucene-index.nyt collection: NewYorkTimesCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 1855650 total terms: 751047962 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2017 Common Core Track Topics](../src/main/resources/topics-and-qrels/topics.core17.txt)" id: core17 @@ -43,63 +40,49 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2087 - p30: + P30: - 0.4293 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2823 - p30: + P30: - 0.5093 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2739 - p30: + P30: - 0.4940 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.2032 - p30: + P30: - 0.4467 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.2606 - p30: + P30: - 0.4827 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2579 - p30: + P30: - 0.4893 diff --git a/src/main/resources/regression/core18.yaml b/src/main/resources/regression/core18.yaml index e49f3f9d50..134dd2bc5e 100644 --- a/src/main/resources/regression/core18.yaml +++ b/src/main/resources/regression/core18.yaml @@ -2,7 +2,7 @@ corpus: wapo.v2 corpus_path: collections/newswire/WashingtonPost.v2/data/ -index_path: indexes/lucene-index.wapo.v2.pos+docvectors+raw +index_path: indexes/lucene-index.wapo.v2 collection: WashingtonPostCollection generator: WashingtonPostGenerator threads: 1 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 595030 total terms: 318219870 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2018 Common Core Track Topics](../src/main/resources/topics-and-qrels/topics.core18.txt)" id: core18 @@ -43,63 +40,49 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2496 - p30: + P30: - 0.3573 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.3139 - p30: + P30: - 0.4200 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2840 - p30: + P30: - 0.3947 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.2527 - p30: + P30: - 0.3653 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.3074 - p30: + P30: - 0.3993 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2920 - p30: + P30: - 0.4013 diff --git a/src/main/resources/regression/cw09b.yaml b/src/main/resources/regression/cw09b.yaml index 1e694841b5..457c548109 100644 --- a/src/main/resources/regression/cw09b.yaml +++ b/src/main/resources/regression/cw09b.yaml @@ -2,7 +2,7 @@ corpus: cw09b corpus_path: collections/web/ClueWeb09b/ -index_path: indexes/lucene-index.cw09b.pos+docvectors+raw +index_path: indexes/lucene-index.cw09b collection: ClueWeb09Collection generator: DefaultLuceneDocumentGenerator threads: 44 @@ -12,183 +12,167 @@ index_stats: documents (non-empty): 50220156 total terms: 31300822176 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true - - command: tools/eval/gdeval.pl + - metric: nDCG@20 + command: tools/eval/gdeval.pl separator: "," parse_index: -2 - metric: ndcg20 metric_precision: 5 - - command: tools/eval/gdeval.pl + - metric: ERR@20 + command: tools/eval/gdeval.pl separator: "," parse_index: -1 - metric: err20 metric_precision: 5 topic_reader: Webxml +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2010 Web Track (Topics 51-100)](../src/main/resources/topics-and-qrels/topics.web.51-100.txt)" + id: trec2010 path: topics.web.51-100.txt qrel: qrels.web.51-100.txt - name: "[TREC 2011 Web Track (Topics 101-150)](../src/main/resources/topics-and-qrels/topics.web.101-150.txt)" + id: trec2011 path: topics.web.101-150.txt qrel: qrels.web.101-150.txt - name: "[TREC 2012 Web Track (Topics 151-200)](../src/main/resources/topics-and-qrels/topics.web.151-200.txt)" + id: trec2012 path: topics.web.151-200.txt qrel: qrels.web.151-200.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1126 - 0.1094 - 0.1106 - p30: + P30: - 0.2681 - 0.2513 - 0.2167 - ndcg20: + nDCG@20: - 0.13509 - 0.18944 - 0.10145 - err20: + ERR@20: - 0.07330 - 0.09592 - 0.13043 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.0931 - 0.1085 - 0.1108 - p30: + P30: - 0.2382 - 0.2487 - 0.1927 - ndcg20: + nDCG@20: - 0.13683 - 0.19153 - 0.09183 - err20: + ERR@20: - 0.07469 - 0.09590 - 0.14937 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -axiom.beta 0.1 - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 results: - map: + MAP: - 0.0961 - 0.0986 - 0.1356 - p30: + P30: - 0.2535 - 0.2367 - 0.2547 - ndcg20: + nDCG@20: - 0.17665 - 0.18536 - 0.13878 - err20: + ERR@20: - 0.10191 - 0.09502 - 0.23994 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.1060 - 0.0959 - 0.1070 - p30: + P30: - 0.2438 - 0.2147 - 0.2080 - ndcg20: + nDCG@20: - 0.11431 - 0.16311 - 0.08755 - err20: + ERR@20: - 0.05994 - 0.08502 - 0.13063 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.1019 - 0.0839 - 0.1058 - p30: + P30: - 0.2312 - 0.2053 - 0.1980 - ndcg20: + nDCG@20: - 0.11824 - 0.14488 - 0.08958 - err20: + ERR@20: - 0.05918 - 0.07872 - 0.13332 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -axiom.beta 0.1 - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 results: - map: + MAP: - 0.1088 - 0.0860 - 0.1224 - p30: + P30: - 0.2625 - 0.2120 - 0.2220 - ndcg20: + nDCG@20: - 0.14950 - 0.15366 - 0.10911 - err20: + ERR@20: - 0.07515 - 0.08610 - 0.15644 diff --git a/src/main/resources/regression/cw12.yaml b/src/main/resources/regression/cw12.yaml index 640621bb45..2015c2a675 100644 --- a/src/main/resources/regression/cw12.yaml +++ b/src/main/resources/regression/cw12.yaml @@ -2,7 +2,7 @@ corpus: cw12 corpus_path: collections/web/ClueWeb12/ -index_path: indexes/lucene-index.cw12.pos+docvectors+raw +index_path: indexes/lucene-index.cw12 collection: ClueWeb12Collection generator: DefaultLuceneDocumentGenerator threads: 44 @@ -12,115 +12,107 @@ index_stats: documents (non-empty): 731542236 total terms: 429234508918 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true - - command: tools/eval/gdeval.pl + - metric: nDCG@20 + command: tools/eval/gdeval.pl separator: "," parse_index: -2 - metric: ndcg20 metric_precision: 5 - - command: tools/eval/gdeval.pl + - metric: ERR@20 + command: tools/eval/gdeval.pl separator: "," parse_index: -1 - metric: err20 metric_precision: 5 topic_reader: Webxml +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2013 Web Track (Topics 201-250)](../src/main/resources/topics-and-qrels/topics.web.201-250.txt)" + id: trec2013 path: topics.web.201-250.txt qrel: qrels.web.201-250.txt - name: "[TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)" + id: trec2014 path: topics.web.251-300.txt qrel: qrels.web.251-300.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1695 - 0.2470 - p30: + P30: - 0.2767 - 0.4547 - ndcg20: + nDCG@20: - 0.20848 - 0.25720 - err20: + ERR@20: - 0.12829 - 0.16163 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.1465 - 0.2330 - p30: + P30: - 0.2393 - 0.4080 - ndcg20: + nDCG@20: - 0.20325 - 0.25163 - err20: + ERR@20: - 0.12645 - 0.16518 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.1493 - 0.2467 - p30: + P30: - 0.2607 - 0.4380 - ndcg20: + nDCG@20: - 0.19935 - 0.22201 - err20: + ERR@20: - 0.12325 - 0.13234 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.1290 - 0.2178 - p30: + P30: - 0.2347 - 0.3813 - ndcg20: + nDCG@20: - 0.17248 - 0.20926 - err20: + ERR@20: - 0.10073 - 0.12492 - diff --git a/src/main/resources/regression/cw12b13.yaml b/src/main/resources/regression/cw12b13.yaml index 6245aa4e0a..5858520721 100644 --- a/src/main/resources/regression/cw12b13.yaml +++ b/src/main/resources/regression/cw12b13.yaml @@ -2,7 +2,7 @@ corpus: cw12b13 corpus_path: collections/web/ClueWeb12-B13/ -index_path: indexes/lucene-index.cw12b13.pos+docvectors+raw +index_path: indexes/lucene-index.cw12b13 collection: ClueWeb12Collection generator: DefaultLuceneDocumentGenerator threads: 44 @@ -12,156 +12,139 @@ index_stats: documents (non-empty): 52237520 total terms: 30660015721 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true - - command: tools/eval/gdeval.pl + - metric: nDCG@20 + command: tools/eval/gdeval.pl separator: "," parse_index: -2 - metric: ndcg20 metric_precision: 5 - - command: tools/eval/gdeval.pl + - metric: ERR@20 + command: tools/eval/gdeval.pl separator: "," parse_index: -1 - metric: err20 metric_precision: 5 topic_reader: Webxml +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2013 Web Track (Topics 201-250)](../src/main/resources/topics-and-qrels/topics.web.201-250.txt)" + id: trec2013 path: topics.web.201-250.txt qrel: qrels.web.201-250.txt - name: "[TREC 2014 Web Track (Topics 251-300)](../src/main/resources/topics-and-qrels/topics.web.251-300.txt)" + id: trec2014 path: topics.web.251-300.txt qrel: qrels.web.251-300.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.0468 - 0.0224 - p30: + P30: - 0.2107 - 0.1273 - ndcg20: + nDCG@20: - 0.12887 - 0.11831 - err20: + ERR@20: - 0.08377 - 0.11980 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.0408 - 0.0210 - p30: + P30: - 0.1673 - 0.1207 - ndcg20: + nDCG@20: - 0.11139 - 0.10754 - err20: + ERR@20: - 0.07525 - 0.10551 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -axiom.beta 0.1 - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 results: - map: + MAP: - 0.0432 - 0.0181 - p30: + P30: - 0.1780 - 0.1107 - ndcg20: + nDCG@20: - 0.13111 - 0.09735 - err20: + ERR@20: - 0.09489 - 0.09246 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.0397 - 0.0235 - p30: + P30: - 0.1773 - 0.1373 - ndcg20: + nDCG@20: - 0.11038 - 0.11762 - err20: + ERR@20: - 0.07674 - 0.10909 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.0322 - 0.0203 - p30: + P30: - 0.1513 - 0.1173 - ndcg20: + nDCG@20: - 0.09211 - 0.10036 - err20: + ERR@20: - 0.05522 - 0.09284 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -axiom.beta 0.1 - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -axiom.beta 0.1 -rerankCutoff 20 results: - map: + MAP: - 0.0356 - 0.0179 - p30: + P30: - 0.1567 - 0.1147 - ndcg20: + nDCG@20: - 0.11128 - 0.09844 - err20: + ERR@20: - 0.07195 - 0.08795 diff --git a/src/main/resources/regression/disk12.yaml b/src/main/resources/regression/disk12.yaml index 5f1d83fd82..e29e761c76 100644 --- a/src/main/resources/regression/disk12.yaml +++ b/src/main/resources/regression/disk12.yaml @@ -2,7 +2,7 @@ corpus: disk12 corpus_path: collections/newswire/disk12/ -index_path: indexes/lucene-index.disk12.pos+docvectors+raw +index_path: indexes/lucene-index.disk12 collection: TrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 741675 total terms: 217199384 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC-1 Ad Hoc Topics 51-100](../src/main/resources/topics-and-qrels/topics.adhoc.51-100.txt)" id: trec1 @@ -51,87 +48,73 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2277 - 0.2003 - 0.2634 - p30: + P30: - 0.4540 - 0.4253 - 0.4860 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2628 - 0.2578 - 0.3345 - p30: + P30: - 0.4860 - 0.4580 - 0.5260 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2648 - 0.2698 - 0.3407 - p30: + P30: - 0.5127 - 0.4720 - 0.5273 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.2188 - 0.2010 - 0.2580 - p30: + P30: - 0.4553 - 0.4193 - 0.4753 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.2465 - 0.2429 - 0.3037 - p30: + P30: - 0.4680 - 0.4400 - 0.4967 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2502 - 0.2596 - 0.3129 - p30: + P30: - 0.4947 - 0.4760 - 0.5187 diff --git a/src/main/resources/regression/disk45.yaml b/src/main/resources/regression/disk45.yaml index fd98e640ed..bc66d57d22 100644 --- a/src/main/resources/regression/disk45.yaml +++ b/src/main/resources/regression/disk45.yaml @@ -2,7 +2,7 @@ corpus: disk45 corpus_path: collections/newswire/disk45/ -index_path: indexes/lucene-index.disk45.pos+docvectors+raw +index_path: indexes/lucene-index.disk45 collection: TrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,28 +12,25 @@ index_stats: documents (non-empty): 528030 total terms: 174540872 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC-7 Ad Hoc Topics](../src/main/resources/topics-and-qrels/topics.adhoc.351-400.txt)" id: trec7 @@ -51,87 +48,73 @@ topics: models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1862 - 0.2515 - 0.2531 - p30: + P30: - 0.3093 - 0.3560 - 0.3102 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2354 - 0.2750 - 0.2903 - p30: + P30: - 0.3447 - 0.3760 - 0.3365 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2431 - 0.2812 - 0.2896 - p30: + P30: - 0.3287 - 0.3753 - 0.3333 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.1843 - 0.2460 - 0.2467 - p30: + P30: - 0.3073 - 0.3480 - 0.3079 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.2168 - 0.2702 - 0.2747 - p30: + P30: - 0.3307 - 0.3680 - 0.3232 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2298 - 0.2647 - 0.2774 - p30: + P30: - 0.3193 - 0.3500 - 0.3229 diff --git a/src/main/resources/regression/dl19-doc-docTTTTTquery-per-doc.yaml b/src/main/resources/regression/dl19-doc-docTTTTTquery-per-doc.yaml index 6104bb2d5b..d6cb2b99ff 100644 --- a/src/main/resources/regression/dl19-doc-docTTTTTquery-per-doc.yaml +++ b/src/main/resources/regression/dl19-doc-docTTTTTquery-per-doc.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-docTTTTTquery-per-doc corpus_path: collections/msmarco/doc-docTTTTTquery-per-doc -index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 3213834 total terms: 3748332076 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)" id: dl19 @@ -51,55 +47,41 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2699 - NDCG@10: + nDCG@10: - 0.5968 R@100: - 0.4198 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -rm3 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.3044 - NDCG@10: + nDCG@10: - 0.5895 R@100: - 0.4465 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 4.68 - - -bm25.b 0.87 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -bm25.k1 4.68 -bm25.b 0.87 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2620 - NDCG@10: + nDCG@10: - 0.5967 R@100: - 0.3992 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 4.68 - - -bm25.b 0.87 - - -rm3 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -bm25.k1 4.68 -bm25.b 0.87 -rm3 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2812 - NDCG@10: + nDCG@10: - 0.6075 R@100: - 0.4119 \ No newline at end of file diff --git a/src/main/resources/regression/dl19-doc-docTTTTTquery-per-passage.yaml b/src/main/resources/regression/dl19-doc-docTTTTTquery-per-passage.yaml index 1014baad3e..ea368bb59f 100644 --- a/src/main/resources/regression/dl19-doc-docTTTTTquery-per-passage.yaml +++ b/src/main/resources/regression/dl19-doc-docTTTTTquery-per-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-docTTTTTquery-per-passage corpus_path: collections/msmarco/doc-docTTTTTquery-per-passage -index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 20544550 total terms: 4203956960 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)" id: dl19 @@ -51,54 +47,41 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2791 - NDCG@10: + nDCG@10: - 0.6099 R@100: - 0.4092 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3025 - NDCG@10: + nDCG@10: - 0.6318 R@100: - 0.4394 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.56 - - -bm25.b 0.59 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2655 - NDCG@10: + nDCG@10: - 0.6271 R@100: - 0.4020 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.56 - - -bm25.b 0.59 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.56 -bm25.b 0.59 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2895 - NDCG@10: + nDCG@10: - 0.6256 R@100: - 0.4235 \ No newline at end of file diff --git a/src/main/resources/regression/dl19-doc-per-passage.yaml b/src/main/resources/regression/dl19-doc-per-passage.yaml index a2377ce5c0..ff12ba504f 100644 --- a/src/main/resources/regression/dl19-doc-per-passage.yaml +++ b/src/main/resources/regression/dl19-doc-per-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-per-passage corpus_path: collections/msmarco/doc-per-passage/ -index_path: indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-per-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 20544550 total terms: 3197886407 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)" id: dl19 @@ -51,114 +47,81 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2441 - NDCG@10: + nDCG@10: - 0.5276 R@100: - 0.3840 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2880 - NDCG@10: + nDCG@10: - 0.5750 R@100: - 0.4356 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3015 - NDCG@10: + nDCG@10: - 0.5590 R@100: - 0.4501 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2821 - NDCG@10: + nDCG@10: - 0.5591 R@100: - 0.4477 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2394 - NDCG@10: + nDCG@10: - 0.5364 R@100: - 0.3903 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2656 - NDCG@10: + nDCG@10: - 0.5379 R@100: - 0.4126 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2934 - NDCG@10: + nDCG@10: - 0.5546 R@100: - 0.4437 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.2838 - NDCG@10: + nDCG@10: - 0.5478 R@100: - 0.4362 \ No newline at end of file diff --git a/src/main/resources/regression/dl19-doc.yaml b/src/main/resources/regression/dl19-doc.yaml index 230eeb8c40..8fb6dd4b90 100644 --- a/src/main/resources/regression/dl19-doc.yaml +++ b/src/main/resources/regression/dl19-doc.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc corpus_path: collections/msmarco/doc/ -index_path: indexes/lucene-index.msmarco-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 3213835 total terms: 2748636047 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL19 (Doc)](https://trec.nist.gov/data/deep2019.html)" id: dl19 @@ -51,115 +47,81 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2443 - NDCG@10: + nDCG@10: - 0.5190 R@100: - 0.3948 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -rm3 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2772 - NDCG@10: + nDCG@10: - 0.5169 R@100: - 0.4189 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2452 - NDCG@10: + nDCG@10: - 0.4730 R@100: - 0.3945 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -bm25prf -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2541 - NDCG@10: + nDCG@10: - 0.5105 R@100: - 0.4004 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2318 - NDCG@10: + nDCG@10: - 0.5140 R@100: - 0.3862 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 - - -rm3 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2700 - NDCG@10: + nDCG@10: - 0.5485 R@100: - 0.4193 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 -axiom -axiom.deterministic -rerankCutoff 20 -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2816 - NDCG@10: + nDCG@10: - 0.5245 R@100: - 0.4399 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 - - -bm25prf - - -hits 100 # Note, this is different DL 2019 passage ranking! + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 -bm25prf -hits 100 # Note, this is different DL 2019 passage ranking! results: - map: + MAP: - 0.2758 - NDCG@10: + nDCG@10: - 0.5280 R@100: - 0.4287 diff --git a/src/main/resources/regression/dl19-passage-docTTTTTquery.yaml b/src/main/resources/regression/dl19-passage-docTTTTTquery.yaml index ac288c86c1..8460a59aeb 100644 --- a/src/main/resources/regression/dl19-passage-docTTTTTquery.yaml +++ b/src/main/resources/regression/dl19-passage-docTTTTTquery.yaml @@ -2,7 +2,7 @@ corpus: msmarco-passage-docTTTTTquery corpus_path: collections/msmarco/passage-docTTTTTquery -index_path: indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-passage-docTTTTTquery collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 9 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 8841823 total terms: 1986612263 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map -c -l 2 separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m recall.1000 -c -l 2 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.10 -c separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2019.html)" id: dl19 @@ -51,78 +47,61 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.4034 - NDCG@10: + nDCG@10: - 0.6417 R@1000: - 0.8310 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.4485 - NDCG@10: + nDCG@10: - 0.6548 R@1000: - 0.8861 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 results: - map: + MAP: - 0.4052 - NDCG@10: + nDCG@10: - 0.6482 R@1000: - 0.8269 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -rm3 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 results: - map: + MAP: - 0.4520 - NDCG@10: + nDCG@10: - 0.6614 R@1000: - 0.8826 - name: bm25-tuned2 display: BM25 (tuned2) - params: - - -bm25 - - -bm25.k1 2.18 - - -bm25.b 0.86 + params: -bm25 -bm25.k1 2.18 -bm25.b 0.86 results: - map: + MAP: - 0.4046 - NDCG@10: + nDCG@10: - 0.6336 R@1000: - 0.8134 - name: bm25-tuned2+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.18 - - -bm25.b 0.86 - - -rm3 + params: -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 results: - map: + MAP: - 0.4360 - NDCG@10: + nDCG@10: - 0.6528 R@1000: - 0.8424 \ No newline at end of file diff --git a/src/main/resources/regression/dl19-passage.yaml b/src/main/resources/regression/dl19-passage.yaml index daca4e505b..d04a971b62 100644 --- a/src/main/resources/regression/dl19-passage.yaml +++ b/src/main/resources/regression/dl19-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-passage corpus_path: collections/msmarco/passage/ -index_path: indexes/lucene-index.msmarco-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 9 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 8841823 total terms: 352316036 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map -c -l 2 separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m recall.1000 -c -l 2 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.10 -c separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2019.html)" id: dl19 @@ -51,107 +47,81 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.3013 - NDCG@10: + nDCG@10: - 0.5058 R@1000: - 0.7501 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.3390 - NDCG@10: + nDCG@10: - 0.5180 R@1000: - 0.7998 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.3745 - NDCG@10: + nDCG@10: - 0.5511 R@1000: - 0.8241 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: - map: + MAP: - 0.3561 - NDCG@10: + nDCG@10: - 0.5372 R@1000: - 0.7929 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 results: - map: + MAP: - 0.2903 - NDCG@10: + nDCG@10: - 0.4973 R@1000: - 0.7450 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -rm3 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 results: - map: + MAP: - 0.3377 - NDCG@10: + nDCG@10: - 0.5231 R@1000: - 0.7792 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.3632 - NDCG@10: + nDCG@10: - 0.5461 R@1000: - 0.8138 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -bm25prf + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf results: - map: + MAP: - 0.3684 - NDCG@10: + nDCG@10: - 0.5536 R@1000: - 0.7988 \ No newline at end of file diff --git a/src/main/resources/regression/dl20-doc-docTTTTTquery-per-doc.yaml b/src/main/resources/regression/dl20-doc-docTTTTTquery-per-doc.yaml index f2ae7926f9..ce724efa74 100644 --- a/src/main/resources/regression/dl20-doc-docTTTTTquery-per-doc.yaml +++ b/src/main/resources/regression/dl20-doc-docTTTTTquery-per-doc.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-docTTTTTquery-per-doc corpus_path: collections/msmarco/doc-docTTTTTquery-per-doc -index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 3213834 total terms: 3748332076 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: RR metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)" id: dl20 @@ -58,63 +54,49 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.4230 - NDCG@10: + nDCG@10: - 0.5885 - RR: + MRR: - 0.9369 R@100: - 0.6412 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -rm3 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.4228 - NDCG@10: + nDCG@10: - 0.5407 - RR: + MRR: - 0.8147 R@100: - 0.6555 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 4.68 - - -bm25.b 0.87 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -bm25.k1 4.68 -bm25.b 0.87 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.4098 - NDCG@10: + nDCG@10: - 0.5852 - RR: + MRR: - 0.9439 R@100: - 0.6178 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 4.68 - - -bm25.b 0.87 - - -rm3 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -bm25.k1 4.68 -bm25.b 0.87 -rm3 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.4104 - NDCG@10: + nDCG@10: - 0.5743 - RR: + MRR: - 0.8701 R@100: - 0.6127 diff --git a/src/main/resources/regression/dl20-doc-docTTTTTquery-per-passage.yaml b/src/main/resources/regression/dl20-doc-docTTTTTquery-per-passage.yaml index f9f927804a..e764499f03 100644 --- a/src/main/resources/regression/dl20-doc-docTTTTTquery-per-passage.yaml +++ b/src/main/resources/regression/dl20-doc-docTTTTTquery-per-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-docTTTTTquery-per-passage corpus_path: collections/msmarco/doc-docTTTTTquery-per-passage -index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 20544550 total terms: 4203956960 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: RR metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)" id: dl20 @@ -58,62 +54,49 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.4150 - NDCG@10: + nDCG@10: - 0.5957 - RR: + MRR: - 0.9361 R@100: - 0.6201 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.4269 - NDCG@10: + nDCG@10: - 0.5848 - RR: + MRR: - 0.8944 R@100: - 0.6443 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.56 - - -bm25.b 0.59 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.4042 - NDCG@10: + nDCG@10: - 0.5931 - RR: + MRR: - 0.9469 R@100: - 0.6192 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.56 - - -bm25.b 0.59 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.56 -bm25.b 0.59 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.4023 - NDCG@10: + nDCG@10: - 0.5723 - RR: + MRR: - 0.9150 R@100: - 0.6392 \ No newline at end of file diff --git a/src/main/resources/regression/dl20-doc-per-passage.yaml b/src/main/resources/regression/dl20-doc-per-passage.yaml index 4cee076149..b39ef290b2 100644 --- a/src/main/resources/regression/dl20-doc-per-passage.yaml +++ b/src/main/resources/regression/dl20-doc-per-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-per-passage corpus_path: collections/msmarco/doc-per-passage/ -index_path: indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-per-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 20544550 total terms: 3197886407 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: RR metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)" id: dl20 @@ -58,130 +54,97 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3584 - NDCG@10: + nDCG@10: - 0.5271 - RR: + MRR: - 0.8479 R@100: - 0.5823 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3769 - NDCG@10: + nDCG@10: - 0.5159 - RR: + MRR: - 0.8136 R@100: - 0.6224 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3854 - NDCG@10: + nDCG@10: - 0.5250 - RR: + MRR: - 0.8123 R@100: - 0.6332 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3672 - NDCG@10: + nDCG@10: - 0.5217 - RR: + MRR: - 0.7911 R@100: - 0.5994 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3456 - NDCG@10: + nDCG@10: - 0.5213 - RR: + MRR: - 0.8684 R@100: - 0.5715 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3471 - NDCG@10: + nDCG@10: - 0.4983 - RR: + MRR: - 0.7807 R@100: - 0.6013 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3495 - NDCG@10: + nDCG@10: - 0.4942 - RR: + MRR: - 0.8102 R@100: - 0.6086 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 100 results: - map: + MAP: - 0.3629 - NDCG@10: + nDCG@10: - 0.5260 - RR: + MRR: - 0.8478 R@100: - 0.6064 \ No newline at end of file diff --git a/src/main/resources/regression/dl20-doc.yaml b/src/main/resources/regression/dl20-doc.yaml index e959b5319e..4079f46807 100644 --- a/src/main/resources/regression/dl20-doc.yaml +++ b/src/main/resources/regression/dl20-doc.yaml @@ -2,7 +2,7 @@ corpus: msmacro-doc corpus_path: collections/msmarco/doc/ -index_path: indexes/lucene-index.msmarco-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 3213835 total terms: 2748636047 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: RR metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL20 (Doc)](https://trec.nist.gov/data/deep2020.html)" id: dl20 @@ -58,96 +54,73 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.3791 - NDCG@10: + nDCG@10: - 0.5271 - RR: + MRR: - 0.8521 R@100: - 0.6110 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -rm3 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.4006 - NDCG@10: + nDCG@10: - 0.5248 - RR: + MRR: - 0.8541 R@100: - 0.6392 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.3630 - NDCG@10: + nDCG@10: - 0.5087 - RR: + MRR: - 0.8641 R@100: - 0.5926 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 - - -rm3 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.3588 - NDCG@10: + nDCG@10: - 0.5117 - RR: + MRR: - 0.8188 R@100: - 0.5983 - name: bm25-tuned2 display: BM25 (tuned2) - params: - - -bm25 - - -bm25.k1 4.46 - - -bm25.b 0.82 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -bm25.k1 4.46 -bm25.b 0.82 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.3583 - NDCG@10: + nDCG@10: - 0.5078 - RR: + MRR: - 0.8541 R@100: - 0.5860 - name: bm25-tuned2+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 4.46 - - -bm25.b 0.82 - - -rm3 - - -hits 100 # Note, this is different DL 2020 passage ranking! + params: -bm25 -bm25.k1 4.46 -bm25.b 0.82 -rm3 -hits 100 # Note, this is different DL 2020 passage ranking! results: - map: + MAP: - 0.3618 - NDCG@10: + nDCG@10: - 0.5202 - RR: + MRR: - 0.8458 R@100: - 0.5998 diff --git a/src/main/resources/regression/dl20-passage-docTTTTTquery.yaml b/src/main/resources/regression/dl20-passage-docTTTTTquery.yaml index bc0038e1ac..392a200743 100644 --- a/src/main/resources/regression/dl20-passage-docTTTTTquery.yaml +++ b/src/main/resources/regression/dl20-passage-docTTTTTquery.yaml @@ -2,7 +2,7 @@ corpus: msmarco-passage-docTTTTTquery corpus_path: collections/msmarco/passage-docTTTTTquery -index_path: indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-passage-docTTTTTquery collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 9 @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 8841823 total terms: 1986612263 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map -l 2 separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank -l 2 separator: "\t" parse_index: 2 - metric: RR metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 -l 2 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 -l 2 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)" id: dl20 @@ -65,14 +61,13 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.4074 - NDCG@10: + nDCG@10: - 0.6187 - RR: + MRR: - 0.7326 R@100: - 0.7044 @@ -80,15 +75,13 @@ models: - 0.8452 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.4295 - NDCG@10: + nDCG@10: - 0.6172 - RR: + MRR: - 0.7424 R@100: - 0.7153 @@ -96,16 +89,13 @@ models: - 0.8699 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 results: - map: + MAP: - 0.4082 - NDCG@10: + nDCG@10: - 0.6192 - RR: + MRR: - 0.7425 R@100: - 0.7046 @@ -113,17 +103,13 @@ models: - 0.8443 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -rm3 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 results: - map: + MAP: - 0.4296 - NDCG@10: + nDCG@10: - 0.6177 - RR: + MRR: - 0.7422 R@100: - 0.7143 @@ -131,16 +117,13 @@ models: - 0.8692 - name: bm25-tuned2 display: BM25 (tuned2) - params: - - -bm25 - - -bm25.k1 2.18 - - -bm25.b 0.86 + params: -bm25 -bm25.k1 2.18 -bm25.b 0.86 results: - map: + MAP: - 0.4171 - NDCG@10: + nDCG@10: - 0.6265 - RR: + MRR: - 0.7467 R@100: - 0.7044 @@ -148,17 +131,13 @@ models: - 0.8393 - name: bm25-tuned2+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.18 - - -bm25.b 0.86 - - -rm3 + params: -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 results: - map: + MAP: - 0.4347 - NDCG@10: + nDCG@10: - 0.6232 - RR: + MRR: - 0.7327 R@100: - 0.7109 diff --git a/src/main/resources/regression/dl20-passage.yaml b/src/main/resources/regression/dl20-passage.yaml index 87b9d16ef7..0c78f48721 100644 --- a/src/main/resources/regression/dl20-passage.yaml +++ b/src/main/resources/regression/dl20-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-passage corpus_path: collections/msmarco/passage/ -index_path: indexes/lucene-index.msmarco-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 9 @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 8841823 total terms: 352316036 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map -l 2 separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank -l 2 separator: "\t" parse_index: 2 - metric: RR metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 -l 2 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 -l 2 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)" id: dl20 @@ -65,14 +61,13 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2856 - NDCG@10: + nDCG@10: - 0.4796 - RR: + MRR: - 0.6585 R@100: - 0.5599 @@ -80,15 +75,13 @@ models: - 0.7863 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.3019 - NDCG@10: + nDCG@10: - 0.4821 - RR: + MRR: - 0.6360 R@100: - 0.6046 @@ -96,17 +89,13 @@ models: - 0.8217 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.3240 - NDCG@10: + nDCG@10: - 0.4834 - RR: + MRR: - 0.6096 R@100: - 0.6428 @@ -114,15 +103,13 @@ models: - 0.8483 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: - map: + MAP: - 0.3117 - NDCG@10: + nDCG@10: - 0.4721 - RR: + MRR: - 0.6157 R@100: - 0.5783 @@ -130,16 +117,13 @@ models: - 0.8074 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 results: - map: + MAP: - 0.2876 - NDCG@10: + nDCG@10: - 0.4876 - RR: + MRR: - 0.6594 R@100: - 0.5669 @@ -147,17 +131,13 @@ models: - 0.8031 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -rm3 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 results: - map: + MAP: - 0.3056 - NDCG@10: + nDCG@10: - 0.4808 - RR: + MRR: - 0.6278 R@100: - 0.6333 @@ -165,19 +145,13 @@ models: - 0.8286 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.3322 - NDCG@10: + nDCG@10: - 0.5027 - RR: + MRR: - 0.6328 R@100: - 0.6468 @@ -185,17 +159,13 @@ models: - 0.8455 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -bm25prf + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf results: - map: + MAP: - 0.3136 - NDCG@10: + nDCG@10: - 0.4788 - RR: + MRR: - 0.6252 R@100: - 0.5782 diff --git a/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml index d9eacf7b7e..9eaa4375f9 100644 --- a/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 124131404 total terms: 805830282591 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP@100 metric_precision: 4 can_combine: no - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)" id: dl21 @@ -65,16 +61,13 @@ topics: models: - name: unicoil-noexp-0shot display: uniCOIL (no expansion, zero-shot) - params: - - -impact - - -pretokenized - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: MAP@100: - 0.2475 MRR@100: - 0.9122 - NDCG@10: + nDCG@10: - 0.6282 R@100: - 0.3497 diff --git a/src/main/resources/regression/dl21-doc-segmented.yaml b/src/main/resources/regression/dl21-doc-segmented.yaml index 2211b46236..8463e56486 100644 --- a/src/main/resources/regression/dl21-doc-segmented.yaml +++ b/src/main/resources/regression/dl21-doc-segmented.yaml @@ -2,7 +2,7 @@ corpus: msmarco-v2-doc-segmented corpus_path: collections/msmarco/msmarco_v2_doc_segmented -index_path: indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-doc-segmented collection: MsMarcoV2DocCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 124131414 total terms: 24780915974 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP@100 metric_precision: 4 can_combine: no - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)" id: dl21 @@ -65,15 +61,13 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 - - -bm25 + params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 results: MAP@100: - 0.2436 MRR@100: - 0.8937 - NDCG@10: + nDCG@10: - 0.5776 R@100: - 0.3478 @@ -81,16 +75,13 @@ models: - 0.6930 - name: bm25-default+rm3 display: +RM3 - params: - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 - - -bm25 - - -rm3 + params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -rm3 results: MAP@100: - 0.2933 MRR@100: - 0.9018 - NDCG@10: + nDCG@10: - 0.6185 R@100: - 0.3892 @@ -98,18 +89,13 @@ models: - 0.7694 - name: bm25-default+ax display: +Ax - params: - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: MAP@100: - 0.2808 MRR@100: - 0.9221 - NDCG@10: + nDCG@10: - 0.5840 R@100: - 0.3884 @@ -117,16 +103,13 @@ models: - 0.7934 - name: bm25-default+prf display: +PRF - params: - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 - - -bm25 - - -bm25prf + params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -bm25 -bm25prf results: MAP@100: - 0.2729 MRR@100: - 0.9146 - NDCG@10: + nDCG@10: - 0.5936 R@100: - 0.3778 diff --git a/src/main/resources/regression/dl21-doc.yaml b/src/main/resources/regression/dl21-doc.yaml index e53b2cdc2e..e8f534ae13 100644 --- a/src/main/resources/regression/dl21-doc.yaml +++ b/src/main/resources/regression/dl21-doc.yaml @@ -2,7 +2,7 @@ corpus: msmarco-v2-doc corpus_path: collections/msmarco/msmarco_v2_doc -index_path: indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-doc collection: MsMarcoV2DocCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 11959635 total terms: 14165661202 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP@100 metric_precision: 4 can_combine: no - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL21 (Doc)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)" id: dl21 @@ -65,15 +61,13 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -hits 1000 - - -bm25 + params: -hits 1000 -bm25 results: MAP@100: - 0.2126 MRR@100: - 0.8367 - NDCG@10: + nDCG@10: - 0.5116 R@100: - 0.3195 @@ -81,16 +75,13 @@ models: - 0.6739 - name: bm25-default+rm3 display: +RM3 - params: - - -hits 1000 - - -bm25 - - -rm3 + params: -hits 1000 -bm25 -rm3 results: MAP@100: - 0.2453 MRR@100: - 0.7994 - NDCG@10: + nDCG@10: - 0.5339 R@100: - 0.3374 @@ -98,18 +89,13 @@ models: - 0.7335 - name: bm25-default+ax display: +Ax - params: - - -hits 1000 - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -hits 1000 -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: MAP@100: - 0.2034 MRR@100: - 0.7434 - NDCG@10: + nDCG@10: - 0.4804 R@100: - 0.3002 @@ -117,16 +103,13 @@ models: - 0.7089 - name: bm25-default+prf display: +PRF - params: - - -hits 1000 - - -bm25 - - -bm25prf + params: -hits 1000 -bm25 -bm25prf results: MAP@100: - 0.2079 MRR@100: - 0.7869 - NDCG@10: + nDCG@10: - 0.4850 R@100: - 0.3096 diff --git a/src/main/resources/regression/dl21-passage-augmented.yaml b/src/main/resources/regression/dl21-passage-augmented.yaml index 4e497f4a70..5addaf1f14 100644 --- a/src/main/resources/regression/dl21-passage-augmented.yaml +++ b/src/main/resources/regression/dl21-passage-augmented.yaml @@ -1,8 +1,8 @@ --- corpus: msmarco-v2-passage-augmented -corpus_path: collections/msmarco/msmarco_v2_passage +corpus_path: collections/msmarco/msmarco_v2_passage_augmented -index_path: indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-passage-augmented collection: MsMarcoV2PassageCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 138364198 total terms: 15272964956 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map -l 2 separator: "\t" parse_index: 2 - metric: MAP@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank -l 2 separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 -l 2 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 -l 2 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)" id: dl21 @@ -65,14 +61,13 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: MAP@100: - 0.0977 MRR@100: - 0.5303 - NDCG@10: + nDCG@10: - 0.3977 R@100: - 0.2709 @@ -80,15 +75,13 @@ models: - 0.5835 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: MAP@100: - 0.1050 MRR@100: - 0.4906 - NDCG@10: + nDCG@10: - 0.3906 R@100: - 0.2795 @@ -96,17 +89,13 @@ models: - 0.6268 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: MAP@100: - 0.0849 MRR@100: - 0.4269 - NDCG@10: + nDCG@10: - 0.2927 R@100: - 0.2645 @@ -114,15 +103,13 @@ models: - 0.6085 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: MAP@100: - 0.0959 MRR@100: - 0.4737 - NDCG@10: + nDCG@10: - 0.3488 R@100: - 0.2729 diff --git a/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml index 8500d2f83a..34c72c04bb 100644 --- a/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 138364198 total terms: 411330032512 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map -l 2 separator: "\t" parse_index: 2 - metric: MAP@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank -l 2 separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 -l 2 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 -l 2 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)" id: dl21 @@ -65,15 +61,13 @@ topics: models: - name: unicoil-noexp-0shot display: uniCOIL (no expansion, zero-shot) - params: - - -impact - - -pretokenized + params: -impact -pretokenized results: MAP@100: - 0.2193 MRR@100: - 0.6991 - NDCG@10: + nDCG@10: - 0.5756 R@100: - 0.4246 diff --git a/src/main/resources/regression/dl21-passage.yaml b/src/main/resources/regression/dl21-passage.yaml index 0f1de9573b..dd2c63a65f 100644 --- a/src/main/resources/regression/dl21-passage.yaml +++ b/src/main/resources/regression/dl21-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-v2-passage corpus_path: collections/msmarco/msmarco_v2_passage -index_path: indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-passage collection: MsMarcoV2PassageCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,50 +12,46 @@ index_stats: documents (non-empty): 138364197 total terms: 4673266762 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map -l 2 separator: "\t" parse_index: 2 - metric: MAP@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank -l 2 separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@10 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m ndcg_cut.10 separator: "\t" parse_index: 2 - metric: NDCG@10 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 -l 2 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 -l 2 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[DL21 (Passage)](https://microsoft.github.io/msmarco/TREC-Deep-Learning)" id: dl21 @@ -65,14 +61,13 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: MAP@100: - 0.1357 MRR@100: - 0.5060 - NDCG@10: + nDCG@10: - 0.4458 R@100: - 0.3261 @@ -80,15 +75,13 @@ models: - 0.6149 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: MAP@100: - 0.1632 MRR@100: - 0.4925 - NDCG@10: + nDCG@10: - 0.4480 R@100: - 0.3498 @@ -96,17 +89,13 @@ models: - 0.6619 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: MAP@100: - 0.1907 MRR@100: - 0.5733 - NDCG@10: + nDCG@10: - 0.4851 R@100: - 0.3803 @@ -114,15 +103,13 @@ models: - 0.6882 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: MAP@100: - 0.1821 MRR@100: - 0.5532 - NDCG@10: + nDCG@10: - 0.4740 R@100: - 0.3745 diff --git a/src/main/resources/regression/fever.yaml b/src/main/resources/regression/fever.yaml index 20ef82b3e4..c22ab7e4cb 100644 --- a/src/main/resources/regression/fever.yaml +++ b/src/main/resources/regression/fever.yaml @@ -12,39 +12,35 @@ index_stats: documents (non-empty): 5396060 total terms: 322660819 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[FEVER Paper Development Dataset](https://s3-eu-west-1.amazonaws.com/fever.public/paper_dev.jsonl)" + id: dev path: topics.fever.dev.txt qrel: qrels.fever.dev.txt models: - name: bm25-default display: BM25 (Default) - params: - - -bm25 + params: -bm25 results: R@100: - 0.8974 @@ -52,10 +48,7 @@ models: - 0.9477 - name: bm25-tuned display: BM25 (Tuned) - params: - - -bm25 - - -bm25.k1 0.9 - - -bm25.b 0.1 + params: -bm25 -bm25.k1 0.9 -bm25.b 0.1 results: R@100: - 0.8988 diff --git a/src/main/resources/regression/fire12-bn.yaml b/src/main/resources/regression/fire12-bn.yaml index 49a2f9aba9..441d26cd11 100644 --- a/src/main/resources/regression/fire12-bn.yaml +++ b/src/main/resources/regression/fire12-bn.yaml @@ -2,7 +2,7 @@ corpus: fire12-bn corpus_path: collections/fire/bengali/bn.docs.2012.19032012 -index_path: indexes/lucene-index.fire12-bn.pos+docvectors+raw +index_path: indexes/lucene-index.fire12-bn collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,51 +12,46 @@ index_stats: documents (non-empty): 500122 total terms: 143972612 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: -search_options: - - -language bn -topic_reader: Trec -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.20 separator: "\t" parse_index: 2 - metric: p20 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.20 separator: "\t" parse_index: 2 - metric: ndcg20 metric_precision: 4 can_combine: true +topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[FIRE 2012 (Monolingual Bengali)](../src/main/resources/topics-and-qrels/topics.fire12bn.176-225.txt)" + id: fire12 path: topics.fire12bn.176-225.txt qrel: qrels.fire12bn.176-225.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 -language bn results: - map: + MAP: - 0.2881 - p20: + P20: - 0.3740 - ndcg20: + nDCG@20: - 0.4261 diff --git a/src/main/resources/regression/fire12-en.yaml b/src/main/resources/regression/fire12-en.yaml index d06240d9e1..9781eb8a10 100644 --- a/src/main/resources/regression/fire12-en.yaml +++ b/src/main/resources/regression/fire12-en.yaml @@ -2,7 +2,7 @@ corpus: fire12-en corpus_path: collections/fire/english/en.docs.2011 -index_path: indexes/lucene-index.fire12-en.pos+docvectors+raw +index_path: indexes/lucene-index.fire12-en collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,52 +12,46 @@ index_stats: documents (non-empty): 392577 total terms: 115311163 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language en -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.20 separator: "\t" parse_index: 2 - metric: p20 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.20 separator: "\t" parse_index: 2 - metric: ndcg20 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[FIRE 2012 (Monolingual English)](../src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt)" + id: fire12 path: topics.fire12en.176-225.txt qrel: qrels.fire12en.176-225.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 -language en results: - map: + MAP: - 0.3713 - p20: + P20: - 0.4970 - ndcg20: + nDCG@20: - 0.5420 diff --git a/src/main/resources/regression/fire12-hi.yaml b/src/main/resources/regression/fire12-hi.yaml index b7168f0b7d..dffc71ae37 100644 --- a/src/main/resources/regression/fire12-hi.yaml +++ b/src/main/resources/regression/fire12-hi.yaml @@ -2,7 +2,7 @@ corpus: fire12-hi corpus_path: collections/fire/hindi/hi.docs.2011 -index_path: indexes/lucene-index.fire12-hi.pos+docvectors+raw +index_path: indexes/lucene-index.fire12-hi collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,52 +12,46 @@ index_stats: documents (non-empty): 331599 total terms: 57038417 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language hi -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.20 separator: "\t" parse_index: 2 - metric: p20 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.20 separator: "\t" parse_index: 2 - metric: ndcg20 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[FIRE 2012 (Monolingual Hindi)](../src/main/resources/topics-and-qrels/topics.fire12en.176-225.txt)" + id: fire12 path: topics.fire12hi.176-225.txt qrel: qrels.fire12hi.176-225.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 -language hi results: - map: + MAP: - 0.3867 - p20: + P20: - 0.4470 - ndcg20: + nDCG@20: - 0.5310 diff --git a/src/main/resources/regression/gov2.yaml b/src/main/resources/regression/gov2.yaml index 679df193e8..b428035770 100644 --- a/src/main/resources/regression/gov2.yaml +++ b/src/main/resources/regression/gov2.yaml @@ -2,7 +2,7 @@ corpus: gov2 corpus_path: collections/web/gov2/gov2-corpus/ -index_path: indexes/lucene-index.gov2.pos+docvectors+raw +index_path: indexes/lucene-index.gov2 collection: TrecwebCollection threads: 44 generator: DefaultLuceneDocumentGenerator @@ -12,125 +12,109 @@ index_stats: documents (non-empty): 25170665 total terms: 17345663488 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2004 Terabyte Track (Topics 701-750)](../src/main/resources/topics-and-qrels/topics.terabyte04.701-750.txt)" + id: trec2004 path: topics.terabyte04.701-750.txt qrel: qrels.terabyte04.701-750.txt - name: "[TREC 2005 Terabyte Track (Topics 751-800)](../src/main/resources/topics-and-qrels/topics.terabyte05.751-800.txt)" + id: trec2005 path: topics.terabyte05.751-800.txt qrel: qrels.terabyte05.751-800.txt - name: "[TREC 2006 Terabyte Track (Topics 801-850)](../src/main/resources/topics-and-qrels/topics.terabyte06.801-850.txt)" + id: trec2006 path: topics.terabyte06.801-850.txt qrel: qrels.terabyte06.801-850.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2689 - 0.3391 - 0.3081 - p30: + P30: - 0.4864 - 0.5540 - 0.4907 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2844 - 0.3812 - 0.3378 - p30: + P30: - 0.5190 - 0.5913 - 0.5160 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.beta 0.1 - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2730 - 0.3649 - 0.3129 - p30: + P30: - 0.5156 - 0.5873 - 0.5073 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.2681 - 0.3304 - 0.2997 - p30: + P30: - 0.4755 - 0.5340 - 0.4727 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.2709 - 0.3550 - 0.3154 - p30: + P30: - 0.4932 - 0.5567 - 0.4840 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.beta 0.1 - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2678 - 0.3614 - 0.3109 - p30: + P30: - 0.4925 - 0.5867 - 0.4960 diff --git a/src/main/resources/regression/mb11.yaml b/src/main/resources/regression/mb11.yaml index 642c1c4aa8..c46e40710a 100644 --- a/src/main/resources/regression/mb11.yaml +++ b/src/main/resources/regression/mb11.yaml @@ -2,7 +2,7 @@ corpus: mb11 corpus_path: collections/twitter/Tweets2011-corpus/json.gold/ -index_path: indexes/lucene-index.mb11.pos+docvectors+raw +index_path: indexes/lucene-index.mb11 collection: TweetCollection threads: 44 generator: TweetGenerator @@ -12,117 +12,93 @@ index_stats: documents (non-empty): 14950449 total terms: 175050212 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Microblog +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2011 Microblog Track Topics](../src/main/resources/topics-and-qrels/topics.microblog2011.txt)" + id: mb11 path: topics.microblog2011.txt qrel: qrels.microblog2011.txt - name: "[TREC 2012 Microblog Track Topics](../src/main/resources/topics-and-qrels/topics.microblog2012.txt)" + id: mb12 path: topics.microblog2012.txt qrel: qrels.microblog2012.txt models: - name: bm25 display: BM25 - params: - - -searchtweets - - -bm25 + params: -searchtweets -bm25 results: - map: + MAP: - 0.3384 - 0.1948 - p30: + P30: - 0.3959 - 0.3316 - name: bm25+rm3 display: +RM3 - params: - - -searchtweets - - -bm25 - - -rm3 + params: -searchtweets -bm25 -rm3 results: - map: + MAP: - 0.3650 - 0.2193 - p30: + P30: - 0.4170 - 0.3463 - name: bm25+ax display: +Ax - params: - - -searchtweets - - -bm25 - - -axiom - - -axiom.beta 1.0 - - -axiom.deterministic - - -rerankCutoff 20 + params: -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.4008 - 0.2309 - p30: + P30: - 0.4612 - 0.3554 - name: ql display: QL - params: - - -searchtweets - - -qld + params: -searchtweets -qld results: - map: + MAP: - 0.3584 - 0.2102 - p30: + P30: - 0.4061 - 0.3333 - name: ql+rm3 display: +RM3 - params: - - -searchtweets - - -qld - - -rm3 + params: -searchtweets -qld -rm3 results: - map: + MAP: - 0.3923 - 0.2389 - p30: + P30: - 0.4435 - 0.3514 - name: ql+ax display: +Ax - params: - - -searchtweets - - -qld - - -axiom - - -axiom.beta 1.0 - - -axiom.deterministic - - -rerankCutoff 20 + params: -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.4201 - 0.2474 - p30: + P30: - 0.4408 - 0.3842 diff --git a/src/main/resources/regression/mb13.yaml b/src/main/resources/regression/mb13.yaml index 204efeeb7a..b5a21946d0 100644 --- a/src/main/resources/regression/mb13.yaml +++ b/src/main/resources/regression/mb13.yaml @@ -2,7 +2,7 @@ corpus: mb13 corpus_path: collections/twitter/Tweets2013-corpus/data/ -index_path: indexes/lucene-index.mb13.pos+docvectors+raw +index_path: indexes/lucene-index.mb13 collection: TweetCollection threads: 44 generator: TweetGenerator @@ -12,117 +12,93 @@ index_stats: documents (non-empty): 203143249 total terms: 2099082204 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Microblog +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2013 Microblog Track Topics](../src/main/resources/topics-and-qrels/topics.microblog2013.txt)" + id: mb13 path: topics.microblog2013.txt qrel: qrels.microblog2013.txt - name: "[TREC 2014 Microblog Track Topics](../src/main/resources/topics-and-qrels/topics.microblog2014.txt)" + id: mb14 path: topics.microblog2014.txt qrel: qrels.microblog2014.txt models: - name: bm25 display: BM25 - params: - - -searchtweets - - -bm25 + params: -searchtweets -bm25 results: - map: + MAP: - 0.2371 - 0.3931 - p30: + P30: - 0.4339 - 0.6212 - name: bm25+rm3 display: +RM3 - params: - - -searchtweets - - -bm25 - - -rm3 + params: -searchtweets -bm25 -rm3 results: - map: + MAP: - 0.2513 - 0.4374 - p30: + P30: - 0.4411 - 0.6442 - name: bm25+ax display: +Ax - params: - - -searchtweets - - -bm25 - - -axiom - - -axiom.beta 1.0 - - -axiom.deterministic - - -rerankCutoff 20 + params: -searchtweets -bm25 -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2855 - 0.4796 - p30: + P30: - 0.4728 - 0.6648 - name: ql display: QL - params: - - -searchtweets - - -qld + params: -searchtweets -qld results: - map: + MAP: - 0.2602 - 0.4181 - p30: + P30: - 0.4561 - 0.6430 - name: ql+rm3 display: +RM3 - params: - - -searchtweets - - -qld - - -rm3 + params: -searchtweets -qld -rm3 results: - map: + MAP: - 0.2911 - 0.4676 - p30: + P30: - 0.4906 - 0.6533 - name: ql+ax display: +Ax - params: - - -searchtweets - - -qld - - -axiom - - -axiom.beta 1.0 - - -axiom.deterministic - - -rerankCutoff 20 + params: -searchtweets -qld -axiom -axiom.beta 1.0 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.3152 - 0.4965 - p30: + P30: - 0.5078 - 0.6727 diff --git a/src/main/resources/regression/mrtydi-v1.1-ar.yaml b/src/main/resources/regression/mrtydi-v1.1-ar.yaml index 505095f764..47ff635987 100644 --- a/src/main/resources/regression/mrtydi-v1.1-ar.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-ar.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-ar corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-arabic -index_path: indexes/lucene-index.mrtydi-v1.1-arabic.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-arabic collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 2106586 total terms: 92529014 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language ar -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Arabic): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-ar.train.txt.gz qrel: qrels.mrtydi-v1.1-ar.train.txt - name: "[Mr. TyDi (Arabic): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-ar.dev.txt.gz qrel: qrels.mrtydi-v1.1-ar.dev.txt - name: "[Mr. TyDi (Arabic): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-ar.test.txt.gz qrel: qrels.mrtydi-v1.1-ar.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language ar results: MRR@100: - 0.3356 diff --git a/src/main/resources/regression/mrtydi-v1.1-bn.yaml b/src/main/resources/regression/mrtydi-v1.1-bn.yaml index a0d6697327..0746989542 100644 --- a/src/main/resources/regression/mrtydi-v1.1-bn.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-bn.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-bn corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-bengali -index_path: indexes/lucene-index.mrtydi-v1.1-bengali.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-bengali collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 304059 total terms: 15236598 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language bn -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Bengali): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-bn.train.txt.gz qrel: qrels.mrtydi-v1.1-bn.train.txt - name: "[Mr. TyDi (Bengali): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-bn.dev.txt.gz qrel: qrels.mrtydi-v1.1-bn.dev.txt - name: "[Mr. TyDi (Bengali): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-bn.test.txt.gz qrel: qrels.mrtydi-v1.1-bn.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language bn results: MRR@100: - 0.3566 diff --git a/src/main/resources/regression/mrtydi-v1.1-en.yaml b/src/main/resources/regression/mrtydi-v1.1-en.yaml index 4183b52af9..5986480fa2 100644 --- a/src/main/resources/regression/mrtydi-v1.1-en.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-en.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-en corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-english -index_path: indexes/lucene-index.mrtydi-v1.1-english.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-english collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 32907100 total terms: 1507060955 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language en -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (English): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-en.train.txt.gz qrel: qrels.mrtydi-v1.1-en.train.txt - name: "[Mr. TyDi (English): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-en.dev.txt.gz qrel: qrels.mrtydi-v1.1-en.dev.txt - name: "[Mr. TyDi (English): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-en.test.txt.gz qrel: qrels.mrtydi-v1.1-en.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language en results: MRR@100: - 0.1592 diff --git a/src/main/resources/regression/mrtydi-v1.1-fi.yaml b/src/main/resources/regression/mrtydi-v1.1-fi.yaml index d4389a751f..c9bae2bbd8 100644 --- a/src/main/resources/regression/mrtydi-v1.1-fi.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-fi.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-fi corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-finnish -index_path: indexes/lucene-index.mrtydi-v1.1-finnish.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-finnish collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 1908757 total terms: 69431615 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language fi -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Finnish): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-fi.train.txt.gz qrel: qrels.mrtydi-v1.1-fi.train.txt - name: "[Mr. TyDi (Finnish): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-fi.dev.txt.gz qrel: qrels.mrtydi-v1.1-fi.dev.txt - name: "[Mr. TyDi (Finnish): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-fi.test.txt.gz qrel: qrels.mrtydi-v1.1-fi.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language fi results: MRR@100: - 0.4101 diff --git a/src/main/resources/regression/mrtydi-v1.1-id.yaml b/src/main/resources/regression/mrtydi-v1.1-id.yaml index e5e40332fb..78abd22b9c 100644 --- a/src/main/resources/regression/mrtydi-v1.1-id.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-id.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-id corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-indonesian -index_path: indexes/lucene-index.mrtydi-v1.1-indonesian.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-indonesian collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 1469399 total terms: 52493134 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language id -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Indonesian): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-id.train.txt.gz qrel: qrels.mrtydi-v1.1-id.train.txt - name: "[Mr. TyDi (Indonesian): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-id.dev.txt.gz qrel: qrels.mrtydi-v1.1-id.dev.txt - name: "[Mr. TyDi (Indonesian): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-id.test.txt.gz qrel: qrels.mrtydi-v1.1-id.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language id results: MRR@100: - 0.2972 diff --git a/src/main/resources/regression/mrtydi-v1.1-ja.yaml b/src/main/resources/regression/mrtydi-v1.1-ja.yaml index 08758f96a2..7c577b7b27 100644 --- a/src/main/resources/regression/mrtydi-v1.1-ja.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-ja.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-ja corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-japanese -index_path: indexes/lucene-index.mrtydi-v1.1-japanese.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-japanese collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 7000027 total terms: 303640353 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language ja -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Japanese): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-ja.train.txt.gz qrel: qrels.mrtydi-v1.1-ja.train.txt - name: "[Mr. TyDi (Japanese): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-ja.dev.txt.gz qrel: qrels.mrtydi-v1.1-ja.dev.txt - name: "[Mr. TyDi (Japanese): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-ja.test.txt.gz qrel: qrels.mrtydi-v1.1-ja.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language ja results: MRR@100: - 0.2236 diff --git a/src/main/resources/regression/mrtydi-v1.1-ko.yaml b/src/main/resources/regression/mrtydi-v1.1-ko.yaml index fc9f3e56af..8a99abe0c2 100644 --- a/src/main/resources/regression/mrtydi-v1.1-ko.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-ko.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-ko corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-korean -index_path: indexes/lucene-index.mrtydi-v1.1-korean.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-korean collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 1496126 total terms: 122217290 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language ko -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Korean): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-ko.train.txt.gz qrel: qrels.mrtydi-v1.1-ko.train.txt - name: "[Mr. TyDi (Korean): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-ko.dev.txt.gz qrel: qrels.mrtydi-v1.1-ko.dev.txt - name: "[Mr. TyDi (Korean): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-ko.test.txt.gz qrel: qrels.mrtydi-v1.1-ko.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language ko results: MRR@100: - 0.2596 diff --git a/src/main/resources/regression/mrtydi-v1.1-ru.yaml b/src/main/resources/regression/mrtydi-v1.1-ru.yaml index c26b83140b..93b88fb3b1 100644 --- a/src/main/resources/regression/mrtydi-v1.1-ru.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-ru.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-ru corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-russian -index_path: indexes/lucene-index.mrtydi-v1.1-russian.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-russian collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 9597504 total terms: 346329152 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language ru -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Russian): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-ru.train.txt.gz qrel: qrels.mrtydi-v1.1-ru.train.txt - name: "[Mr. TyDi (Russian): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-ru.dev.txt.gz qrel: qrels.mrtydi-v1.1-ru.dev.txt - name: "[Mr. TyDi (Russian): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-ru.test.txt.gz qrel: qrels.mrtydi-v1.1-ru.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language ru results: MRR@100: - 0.2205 diff --git a/src/main/resources/regression/mrtydi-v1.1-sw.yaml b/src/main/resources/regression/mrtydi-v1.1-sw.yaml index 85dee502bc..089feba19b 100644 --- a/src/main/resources/regression/mrtydi-v1.1-sw.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-sw.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-sw corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-swahili -index_path: indexes/lucene-index.mrtydi-v1.1-swahili.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-swahili collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 136689 total terms: 4937051 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -pretokenized -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Swahili): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-sw.train.txt.gz qrel: qrels.mrtydi-v1.1-sw.train.txt - name: "[Mr. TyDi (Swahili): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-sw.dev.txt.gz qrel: qrels.mrtydi-v1.1-sw.dev.txt - name: "[Mr. TyDi (Swahili): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-sw.test.txt.gz qrel: qrels.mrtydi-v1.1-sw.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -pretokenized results: MRR@100: - 0.2610 diff --git a/src/main/resources/regression/mrtydi-v1.1-te.yaml b/src/main/resources/regression/mrtydi-v1.1-te.yaml index 5a74ced809..7c106424c5 100644 --- a/src/main/resources/regression/mrtydi-v1.1-te.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-te.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-te corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-telugu -index_path: indexes/lucene-index.mrtydi-v1.1-telugu.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-telugu collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 548224 total terms: 27173644 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -pretokenized -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Telugu): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-te.train.txt.gz qrel: qrels.mrtydi-v1.1-te.train.txt - name: "[Mr. TyDi (Telugu): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-te.dev.txt.gz qrel: qrels.mrtydi-v1.1-te.dev.txt - name: "[Mr. TyDi (Telugu): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-te.test.txt.gz qrel: qrels.mrtydi-v1.1-te.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -pretokenized results: MRR@100: - 0.2847 diff --git a/src/main/resources/regression/mrtydi-v1.1-th.yaml b/src/main/resources/regression/mrtydi-v1.1-th.yaml index 453c04fa80..28e1b661e5 100644 --- a/src/main/resources/regression/mrtydi-v1.1-th.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-th.yaml @@ -2,7 +2,7 @@ corpus: mrtydi-v1.1-th corpus_path: collections/mr-tydi-corpus/mrtydi-v1.1-thai -index_path: indexes/lucene-index.mrtydi-v1.1-thai.pos+docvectors+raw +index_path: indexes/lucene-index.mrtydi-v1.1-thai collection: MrTyDiCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,47 +12,43 @@ index_stats: documents (non-empty): 568855 total terms: 31550936 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language th -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Mr. TyDi (Thai): train](https://github.com/castorini/mr.tydi)" + id: train path: topics.mrtydi-v1.1-th.train.txt.gz qrel: qrels.mrtydi-v1.1-th.train.txt - name: "[Mr. TyDi (Thai): dev](https://github.com/castorini/mr.tydi)" + id: dev path: topics.mrtydi-v1.1-th.dev.txt.gz qrel: qrels.mrtydi-v1.1-th.dev.txt - name: "[Mr. TyDi (Thai): test](https://github.com/castorini/mr.tydi)" + id: test path: topics.mrtydi-v1.1-th.test.txt.gz qrel: qrels.mrtydi-v1.1-th.test.txt models: - name: bm25 display: BM25 - params: - - -bm25 -hits 100 + params: -bm25 -hits 100 -language th results: MRR@100: - 0.3543 diff --git a/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-doc.yaml b/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-doc.yaml index adf2285406..e593ef6124 100644 --- a/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-doc.yaml +++ b/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-doc.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-docTTTTTquery-per-doc corpus_path: collections/msmarco/doc-docTTTTTquery-per-doc -index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-doc collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 3213834 total terms: 3748332076 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev @@ -51,10 +47,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2886 R@100: - 0.7990 @@ -62,12 +57,9 @@ models: - 0.9259 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 4.68 - - -bm25.b 0.87 + params: -bm25 -bm25.k1 4.68 -bm25.b 0.87 results: - map: + MAP: - 0.3270 R@100: - 0.8608 diff --git a/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage-v3.yaml b/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage-v3.yaml index 719d83843e..0c2d875749 100644 --- a/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage-v3.yaml +++ b/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage-v3.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-docTTTTTquery-per-passage-v3 corpus_path: collections/msmarco/doc-docTTTTTquery-per-passage-v3 -index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage-v3 collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 20545677 total terms: 4206639543 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev @@ -51,10 +47,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.3184 R@100: - 0.8479 @@ -62,13 +57,9 @@ models: - 0.9490 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.56 - - -bm25.b 0.59 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.3213 R@100: - 0.8627 diff --git a/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage.yaml b/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage.yaml index df6f5c50f2..6f38a65940 100644 --- a/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage.yaml +++ b/src/main/resources/regression/msmarco-doc-docTTTTTquery-per-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-docTTTTTquery-per-passage corpus_path: collections/msmarco/doc-docTTTTTquery-per-passage -index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-docTTTTTquery-per-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 20544550 total terms: 4203956960 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev @@ -51,10 +47,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.3182 R@100: - 0.8481 @@ -62,13 +57,9 @@ models: - 0.9490 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.56 - - -bm25.b 0.59 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.56 -bm25.b 0.59 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.3211 R@100: - 0.8627 diff --git a/src/main/resources/regression/msmarco-doc-per-passage-v2.yaml b/src/main/resources/regression/msmarco-doc-per-passage-v2.yaml index f4b3740af4..6c9a510c1c 100644 --- a/src/main/resources/regression/msmarco-doc-per-passage-v2.yaml +++ b/src/main/resources/regression/msmarco-doc-per-passage-v2.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-per-passage-v2 corpus_path: collections/msmarco/doc-per-passage-v2/ -index_path: indexes/lucene-index.msmarco-doc-per-passage-v2.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-per-passage-v2 collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 20545612 total terms: 3056059952 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev @@ -51,10 +47,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2609 R@100: - 0.7737 @@ -62,12 +57,9 @@ models: - 0.9095 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2324 R@100: - 0.7768 @@ -75,14 +67,9 @@ models: - 0.9266 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2170 R@100: - 0.7578 @@ -90,12 +77,9 @@ models: - 0.9207 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2189 R@100: - 0.7570 @@ -103,13 +87,9 @@ models: - 0.9135 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2639 R@100: - 0.7884 @@ -117,14 +97,9 @@ models: - 0.9222 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2342 R@100: - 0.7793 @@ -132,16 +107,9 @@ models: - 0.9239 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2250 R@100: - 0.7730 @@ -149,14 +117,9 @@ models: - 0.9268 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2184 R@100: - 0.7520 diff --git a/src/main/resources/regression/msmarco-doc-per-passage-v3.yaml b/src/main/resources/regression/msmarco-doc-per-passage-v3.yaml index a7801897ee..c598a1dacd 100644 --- a/src/main/resources/regression/msmarco-doc-per-passage-v3.yaml +++ b/src/main/resources/regression/msmarco-doc-per-passage-v3.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-per-passage-v3 corpus_path: collections/msmarco/doc-per-passage-v3/ -index_path: indexes/lucene-index.msmarco-doc-per-passage-v3.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-per-passage-v3 collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 20545677 total terms: 3200515914 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev @@ -51,10 +47,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2690 R@100: - 0.7847 @@ -62,12 +57,9 @@ models: - 0.9178 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2419 R@100: - 0.7882 @@ -75,14 +67,9 @@ models: - 0.9355 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2208 R@100: - 0.7710 @@ -90,12 +77,9 @@ models: - 0.9264 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2325 R@100: - 0.7722 @@ -103,13 +87,9 @@ models: - 0.9185 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2762 R@100: - 0.8013 @@ -117,14 +97,9 @@ models: - 0.9311 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2450 R@100: - 0.7961 @@ -132,16 +107,9 @@ models: - 0.9363 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2330 R@100: - 0.7888 @@ -149,14 +117,9 @@ models: - 0.9353 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2276 R@100: - 0.7687 diff --git a/src/main/resources/regression/msmarco-doc-per-passage.yaml b/src/main/resources/regression/msmarco-doc-per-passage.yaml index debf02d5a1..c1c7466074 100644 --- a/src/main/resources/regression/msmarco-doc-per-passage.yaml +++ b/src/main/resources/regression/msmarco-doc-per-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc-per-passage corpus_path: collections/msmarco/doc-per-passage/ -index_path: indexes/lucene-index.msmarco-doc-per-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc-per-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 20544550 total terms: 3197886407 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev @@ -51,10 +47,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2688 R@100: - 0.7849 @@ -62,12 +57,9 @@ models: - 0.9180 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2416 R@100: - 0.7876 @@ -75,14 +67,9 @@ models: - 0.9355 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2229 R@100: - 0.7703 @@ -90,12 +77,9 @@ models: - 0.9266 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2325 R@100: - 0.7714 @@ -103,13 +87,9 @@ models: - 0.9187 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2756 R@100: - 0.8009 @@ -117,14 +97,9 @@ models: - 0.9311 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2443 R@100: - 0.7955 @@ -132,16 +107,9 @@ models: - 0.9359 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2350 R@100: - 0.7909 @@ -149,14 +117,9 @@ models: - 0.9341 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 2.16 - - -bm25.b 0.61 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25.k1 2.16 -bm25.b 0.61 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - map: + MAP: - 0.2271 R@100: - 0.7685 diff --git a/src/main/resources/regression/msmarco-doc.yaml b/src/main/resources/regression/msmarco-doc.yaml index df731b5bcb..591ad0bfbc 100644 --- a/src/main/resources/regression/msmarco-doc.yaml +++ b/src/main/resources/regression/msmarco-doc.yaml @@ -2,7 +2,7 @@ corpus: msmarco-doc corpus_path: collections/msmarco/doc/ -index_path: indexes/lucene-index.msmarco-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-doc collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 1 @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 3213835 total terms: 2748636047 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev @@ -51,10 +47,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2310 R@100: - 0.7279 @@ -62,11 +57,9 @@ models: - 0.8856 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.1632 R@100: - 0.6765 @@ -74,12 +67,9 @@ models: - 0.8785 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 results: - map: + MAP: - 0.2788 R@100: - 0.8065 @@ -87,13 +77,9 @@ models: - 0.9326 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 3.44 - - -bm25.b 0.87 - - -rm3 + params: -bm25 -bm25.k1 3.44 -bm25.b 0.87 -rm3 results: - map: + MAP: - 0.2289 R@100: - 0.7872 @@ -101,12 +87,9 @@ models: - 0.9320 - name: bm25-tuned2 display: BM25 (tuned2) - params: - - -bm25 - - -bm25.k1 4.46 - - -bm25.b 0.82 + params: -bm25 -bm25.k1 4.46 -bm25.b 0.82 results: - map: + MAP: - 0.2775 R@100: - 0.8076 @@ -114,13 +97,9 @@ models: - 0.9357 - name: bm25-tuned2+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 4.46 - - -bm25.b 0.82 - - -rm3 + params: -bm25 -bm25.k1 4.46 -bm25.b 0.82 -rm3 results: - map: + MAP: - 0.2238 R@100: - 0.7789 diff --git a/src/main/resources/regression/msmarco-passage-deepimpact.yaml b/src/main/resources/regression/msmarco-passage-deepimpact.yaml index cda8976b8c..c7a6a832ba 100644 --- a/src/main/resources/regression/msmarco-passage-deepimpact.yaml +++ b/src/main/resources/regression/msmarco-passage-deepimpact.yaml @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 8841823 total terms: 35455908214 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: mrr metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)" id: dev @@ -51,12 +47,11 @@ topics: models: - name: deepimpact display: DeepImpact - params: - - -impact -pretokenized + params: -impact -pretokenized results: - map: + MAP: - 0.3334 - mrr: + MRR: - 0.3386 R@1000: - 0.9476 diff --git a/src/main/resources/regression/msmarco-passage-distill-splade-max.yaml b/src/main/resources/regression/msmarco-passage-distill-splade-max.yaml index 918b3c16eb..ee841945c6 100644 --- a/src/main/resources/regression/msmarco-passage-distill-splade-max.yaml +++ b/src/main/resources/regression/msmarco-passage-distill-splade-max.yaml @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 8841823 total terms: 95445422483 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: mrr metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)" id: dev @@ -51,12 +47,11 @@ topics: models: - name: distill-splade-max display: DistilSPLADE-max - params: - - -impact -pretokenized + params: -impact -pretokenized results: - map: + MAP: - 0.3746 - mrr: + MRR: - 0.3798 R@1000: - 0.9787 diff --git a/src/main/resources/regression/msmarco-passage-doc2query.yaml b/src/main/resources/regression/msmarco-passage-doc2query.yaml index 508d3c2a02..ba58b4dcbc 100644 --- a/src/main/resources/regression/msmarco-passage-doc2query.yaml +++ b/src/main/resources/regression/msmarco-passage-doc2query.yaml @@ -2,7 +2,7 @@ corpus: msmarco-passage-doc2query corpus_path: collections/msmarco/passage-expanded-topk10 -index_path: indexes/lucene-index.msmarco-passage-doc2query.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-passage-doc2query collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 9 @@ -12,29 +12,25 @@ index_stats: documents (non-empty): 8841823 total terms: 739691803 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)" id: dev @@ -44,43 +40,33 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2270 R@1000: - 0.8900 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2028 R@1000: - 0.8916 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 results: - map: + MAP: - 0.2293 R@1000: - 0.8911 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -rm3 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 results: - map: + MAP: - 0.2077 R@1000: - 0.8957 diff --git a/src/main/resources/regression/msmarco-passage-docTTTTTquery.yaml b/src/main/resources/regression/msmarco-passage-docTTTTTquery.yaml index 641f8594cb..4297aaa4b9 100644 --- a/src/main/resources/regression/msmarco-passage-docTTTTTquery.yaml +++ b/src/main/resources/regression/msmarco-passage-docTTTTTquery.yaml @@ -2,7 +2,7 @@ corpus: msmarco-passage-docTTTTTquery corpus_path: collections/msmarco/passage-docTTTTTquery -index_path: indexes/lucene-index.msmarco-passage-docTTTTTquery.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-passage-docTTTTTquery collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 9 @@ -12,29 +12,25 @@ index_stats: documents (non-empty): 8841823 total terms: 1986612263 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)" id: dev @@ -44,10 +40,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2805 R@1000: - 0.9470 @@ -55,57 +50,41 @@ models: - 0.2767 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2243 R@1000: - 0.9463 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 results: - map: + MAP: - 0.2850 R@1000: - 0.9471 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -rm3 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 results: - map: + MAP: - 0.2266 R@1000: - 0.9479 - name: bm25-tuned2 display: BM25 (tuned2) - params: - - -bm25 - - -bm25.k1 2.18 - - -bm25.b 0.86 + params: -bm25 -bm25.k1 2.18 -bm25.b 0.86 results: - map: + MAP: - 0.2893 R@1000: - 0.9506 - name: bm25-tuned2+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 2.18 - - -bm25.b 0.86 - - -rm3 + params: -bm25 -bm25.k1 2.18 -bm25.b 0.86 -rm3 results: - map: + MAP: - 0.2464 R@1000: - 0.9528 \ No newline at end of file diff --git a/src/main/resources/regression/msmarco-passage-unicoil-tilde-expansion.yaml b/src/main/resources/regression/msmarco-passage-unicoil-tilde-expansion.yaml index 0f8b77b996..18b419789c 100644 --- a/src/main/resources/regression/msmarco-passage-unicoil-tilde-expansion.yaml +++ b/src/main/resources/regression/msmarco-passage-unicoil-tilde-expansion.yaml @@ -3,45 +3,41 @@ corpus: msmarco-passage-unicoil-tilde-expansion corpus_path: collections/msmarco/msmarco-passage-unicoil-tilde-expansion-b8/ index_path: indexes/lucene-index.msmarco-passage-unicoil-tilde-expansion -index_stats: - documents: 8841823 - documents (non-empty): 8841823 - total terms: 73040108576 - -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: collection: JsonVectorCollection generator: DefaultLuceneDocumentGenerator threads: 16 index_options: -impact -pretokenized +index_stats: + documents: 8841823 + documents (non-empty): 8841823 + total terms: 73040108576 -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: mrr metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)" id: dev @@ -51,12 +47,11 @@ topics: models: - name: unicoil-tilde-expansion display: uniCOIL w/ TILDE expansion - params: - - -impact -pretokenized + params: -impact -pretokenized results: - map: + MAP: - 0.3561 - mrr: + MRR: - 0.3607 R@1000: - 0.9646 diff --git a/src/main/resources/regression/msmarco-passage-unicoil.yaml b/src/main/resources/regression/msmarco-passage-unicoil.yaml index 906a1662c8..bd95dd9692 100644 --- a/src/main/resources/regression/msmarco-passage-unicoil.yaml +++ b/src/main/resources/regression/msmarco-passage-unicoil.yaml @@ -12,36 +12,32 @@ index_stats: documents (non-empty): 8841823 total terms: 44495093768 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recip_rank separator: "\t" parse_index: 2 - metric: mrr metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)" id: dev @@ -51,12 +47,11 @@ topics: models: - name: unicoil display: uniCOIL w/ doc2query-T5 expansion - params: - - -impact -pretokenized + params: -impact -pretokenized results: - map: + MAP: - 0.3574 - mrr: + MRR: - 0.3625 R@1000: - 0.9582 diff --git a/src/main/resources/regression/msmarco-passage.yaml b/src/main/resources/regression/msmarco-passage.yaml index 5eeecf4ecd..60ab1aef81 100644 --- a/src/main/resources/regression/msmarco-passage.yaml +++ b/src/main/resources/regression/msmarco-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-passage corpus_path: collections/msmarco/passage/ -index_path: indexes/lucene-index.msmarco-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-passage collection: JsonCollection generator: DefaultLuceneDocumentGenerator threads: 9 @@ -12,29 +12,25 @@ index_stats: documents (non-empty): 8841823 total terms: 352316036 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: true topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)" id: dev @@ -44,91 +40,65 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1926 R@1000: - 0.8526 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.1661 R@1000: - 0.8606 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1625 R@1000: - 0.8747 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: - map: + MAP: - 0.1520 R@1000: - 0.8537 - name: bm25-tuned display: BM25 (tuned) - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 results: - map: + MAP: - 0.1958 R@1000: - 0.8573 - name: bm25-tuned+rm3 display: +RM3 - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -rm3 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -rm3 results: - map: + MAP: - 0.1762 R@1000: - 0.8687 - name: bm25-tuned+ax display: +Ax - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.1699 R@1000: - 0.8809 - name: bm25-tuned+prf display: +PRF - params: - - -bm25 - - -bm25.k1 0.82 - - -bm25.b 0.68 - - -bm25prf + params: -bm25 -bm25.k1 0.82 -bm25.b 0.68 -bm25prf results: - map: + MAP: - 0.1582 R@1000: - 0.8561 diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml index 6e88aa5a73..2c1d7b8dd7 100644 --- a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 124131404 total terms: 805830282591 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" id: dev @@ -62,12 +58,9 @@ topics: models: - name: unicoil-noexp-0shot display: uniCOIL (no expansion, zero-shot) - params: - - -impact - - -pretokenized - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - MAP: + MAP@100: - 0.2050 - 0.2082 MRR@100: diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented.yaml index c4b22510c0..7972fbcf9c 100644 --- a/src/main/resources/regression/msmarco-v2-doc-segmented.yaml +++ b/src/main/resources/regression/msmarco-v2-doc-segmented.yaml @@ -2,7 +2,7 @@ corpus: msmarco-v2-doc-segmented corpus_path: collections/msmarco/msmarco_v2_doc_segmented -index_path: indexes/lucene-index.msmarco-v2-doc-segmented.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-doc-segmented collection: MsMarcoV2DocCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 124131414 total terms: 24780915974 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" id: dev @@ -62,11 +58,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - MAP: + MAP@100: - 0.1875 - 0.1903 MRR@100: @@ -80,12 +74,9 @@ models: - 0.8549 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - MAP: + MAP@100: - 0.1648 - 0.1679 MRR@100: @@ -99,14 +90,9 @@ models: - 0.8633 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - MAP: + MAP@100: - 0.1344 - 0.1345 MRR@100: @@ -120,12 +106,9 @@ models: - 0.8210 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf - - -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + params: -bm25 -bm25prf -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: - MAP: + MAP@100: - 0.1528 - 0.1565 MRR@100: diff --git a/src/main/resources/regression/msmarco-v2-doc.yaml b/src/main/resources/regression/msmarco-v2-doc.yaml index 76c8d6dea2..ab9afc527e 100644 --- a/src/main/resources/regression/msmarco-v2-doc.yaml +++ b/src/main/resources/regression/msmarco-v2-doc.yaml @@ -2,7 +2,7 @@ corpus: msmarco-v2-doc corpus_path: collections/msmarco/msmarco_v2_doc -index_path: indexes/lucene-index.msmarco-v2-doc.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-doc collection: MsMarcoV2DocCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 11959635 total terms: 14165661202 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" id: dev @@ -62,10 +58,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - MAP: + MAP@100: - 0.1552 - 0.1639 MRR@100: @@ -79,11 +74,9 @@ models: - 0.8029 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - MAP: + MAP@100: - 0.0966 - 0.1011 MRR@100: @@ -97,13 +90,9 @@ models: - 0.7736 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - MAP: + MAP@100: - 0.0665 - 0.0722 MRR@100: @@ -117,11 +106,9 @@ models: - 0.6960 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: - MAP: + MAP@100: - 0.0834 - 0.0876 MRR@100: diff --git a/src/main/resources/regression/msmarco-v2-passage-augmented.yaml b/src/main/resources/regression/msmarco-v2-passage-augmented.yaml index 6bfc6d6b37..c433dcf4ea 100644 --- a/src/main/resources/regression/msmarco-v2-passage-augmented.yaml +++ b/src/main/resources/regression/msmarco-v2-passage-augmented.yaml @@ -2,7 +2,7 @@ corpus: msmarco-v2-passage-augmented corpus_path: collections/msmarco/msmarco_v2_passage_augmented -index_path: indexes/lucene-index.msmarco-v2-passage-augmented.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-passage-augmented collection: MsMarcoV2PassageCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 138364198 total terms: 15272964956 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO V2 Passage: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" id: dev @@ -62,10 +58,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - MAP: + MAP@100: - 0.0863 - 0.0904 MRR@100: @@ -79,11 +74,9 @@ models: - 0.6933 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - MAP: + MAP@100: - 0.0669 - 0.0692 MRR@100: @@ -97,13 +90,9 @@ models: - 0.6838 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - MAP: + MAP@100: - 0.0431 - 0.0442 MRR@100: @@ -117,11 +106,9 @@ models: - 0.6043 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: - MAP: + MAP@100: - 0.0542 - 0.0563 MRR@100: diff --git a/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml b/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml index bdd4b127a8..b9652af959 100644 --- a/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 138364198 total terms: 411330032512 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO V2 Passage: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" id: dev @@ -62,11 +58,9 @@ topics: models: - name: unicoil-noexp-0shot display: uniCOIL (no expansion, zero-shot) - params: - - -impact - - -pretokenized + params: -impact -pretokenized results: - MAP: + MAP@100: - 0.1333 - 0.1374 MRR@100: diff --git a/src/main/resources/regression/msmarco-v2-passage.yaml b/src/main/resources/regression/msmarco-v2-passage.yaml index 7e80e711fd..df9eb64a0b 100644 --- a/src/main/resources/regression/msmarco-v2-passage.yaml +++ b/src/main/resources/regression/msmarco-v2-passage.yaml @@ -2,7 +2,7 @@ corpus: msmarco-v2-passage corpus_path: collections/msmarco/msmarco_v2_passage -index_path: indexes/lucene-index.msmarco-v2-passage.pos+docvectors+raw +index_path: indexes/lucene-index.msmarco-v2-passage collection: MsMarcoV2PassageCollection generator: DefaultLuceneDocumentGenerator threads: 18 @@ -12,43 +12,39 @@ index_stats: documents (non-empty): 138364197 total terms: 4673266762 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m map separator: "\t" parse_index: 2 - metric: MAP metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: MRR@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -M 100 -m recip_rank separator: "\t" parse_index: 2 - metric: MRR@100 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@100 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.100 separator: "\t" parse_index: 2 - metric: R@100 metric_precision: 4 can_combine: false - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: R@1000 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -c -m recall.1000 separator: "\t" parse_index: 2 - metric: R@1000 metric_precision: 4 can_combine: false topic_reader: TsvInt +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO V2 Passage: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" id: dev @@ -62,10 +58,9 @@ topics: models: - name: bm25-default display: BM25 (default) - params: - - -bm25 + params: -bm25 results: - MAP: + MAP@100: - 0.0709 - 0.0794 MRR@100: @@ -79,11 +74,9 @@ models: - 0.5839 - name: bm25-default+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - MAP: + MAP@100: - 0.0611 - 0.0647 MRR@100: @@ -97,13 +90,9 @@ models: - 0.6049 - name: bm25-default+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - MAP: + MAP@100: - 0.0592 - 0.0642 MRR@100: @@ -117,11 +106,9 @@ models: - 0.6254 - name: bm25-default+prf display: +PRF - params: - - -bm25 - - -bm25prf + params: -bm25 -bm25prf results: - MAP: + MAP@100: - 0.0595 - 0.0632 MRR@100: diff --git a/src/main/resources/regression/ntcir8-zh.yaml b/src/main/resources/regression/ntcir8-zh.yaml index df45c71c64..f1ac57d19d 100644 --- a/src/main/resources/regression/ntcir8-zh.yaml +++ b/src/main/resources/regression/ntcir8-zh.yaml @@ -2,7 +2,7 @@ corpus: ntcir8-zh corpus_path: collections/newswire/clir/ntcir.zh/ntcir8-zh/ -index_path: indexes/lucene-index.ntcir8-zh.pos+docvectors+raw +index_path: indexes/lucene-index.ntcir8-zh collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -13,52 +13,46 @@ index_stats: documents (non-empty): 308832 total terms: 134618634 # on Mac, I seem to get this value instead: 134618027 (no idea why) -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language zh -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.20 separator: "\t" parse_index: 2 - metric: p20 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.20 separator: "\t" parse_index: 2 - metric: ndcg20 metric_precision: 4 can_combine: true topic_reader: TsvString +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[NTCIR-8 ACLIA (IR4QA subtask, Monolingual Chinese)](../src/main/resources/topics-and-qrels/topics.ntcir8zh.eval.txt)" + id: ntcir8 path: topics.ntcir8zh.eval.txt qrel: qrels.ntcir8.eval.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 -language zh results: - map: + MAP: - 0.4014 - p20: + P20: - 0.3849 - ndcg20: + nDCG@20: - 0.4757 diff --git a/src/main/resources/regression/robust05.yaml b/src/main/resources/regression/robust05.yaml index c19c769a03..ff14b7b42c 100644 --- a/src/main/resources/regression/robust05.yaml +++ b/src/main/resources/regression/robust05.yaml @@ -2,7 +2,7 @@ corpus: robust05 corpus_path: collections/newswire/AQUAINT/ -index_path: indexes/lucene-index.robust05.pos+docvectors+raw +index_path: indexes/lucene-index.robust05 collection: TrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,94 +12,77 @@ index_stats: documents (non-empty): 1031326 total terms: 317246296 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2005 Robust Track Topics](../src/main/resources/topics-and-qrels/topics.robust05.txt)" + id: trec05 path: topics.robust05.txt qrel: qrels.robust05.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.2032 - p30: + P30: - 0.3693 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2602 - p30: + P30: - 0.4187 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2587 - p30: + P30: - 0.4120 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.2028 - p30: + P30: - 0.3653 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.2491 - p30: + P30: - 0.4067 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2476 - p30: + P30: - 0.4113 diff --git a/src/main/resources/regression/trec02-ar.yaml b/src/main/resources/regression/trec02-ar.yaml index b58fa88cea..db60f7ab9c 100644 --- a/src/main/resources/regression/trec02-ar.yaml +++ b/src/main/resources/regression/trec02-ar.yaml @@ -2,7 +2,7 @@ corpus: trec02-ar corpus_path: collections/newswire/clir/trec.ar/arabic_newswire_a_ldc2001t55/transcripts -index_path: indexes/lucene-index.trec02-ar.pos+docvectors+raw +index_path: indexes/lucene-index.trec02-ar collection: CleanTrecCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,52 +12,46 @@ index_stats: documents (non-empty): 383872 total terms: 62378088 -search_command: target/appassembler/bin/SearchCollection -search_options: - - -language ar -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.20 separator: "\t" parse_index: 2 - metric: p20 metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: nDCG@20 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m ndcg_cut.20 separator: "\t" parse_index: 2 - metric: ndcg20 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[TREC 2002 (Monolingual Arabic)](../src/main/resources/topics-and-qrels/topics.trec02ar-ar.txt)" + id: trec02 path: topics.trec02ar-ar.txt qrel: qrels.trec02ar.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 -language ar results: - map: + MAP: - 0.2932 - p20: + P20: - 0.3610 - ndcg20: + nDCG@20: - 0.4056 diff --git a/src/main/resources/regression/wt10g.yaml b/src/main/resources/regression/wt10g.yaml index d28fb57fd4..6c4d2345b6 100644 --- a/src/main/resources/regression/wt10g.yaml +++ b/src/main/resources/regression/wt10g.yaml @@ -2,7 +2,7 @@ corpus: wt10g corpus_path: collections/web/wt10g/ -index_path: indexes/lucene-index.wt10g.pos+docvectors+raw +index_path: indexes/lucene-index.wt10g collection: TrecwebCollection generator: DefaultLuceneDocumentGenerator threads: 16 @@ -12,96 +12,77 @@ index_stats: documents (non-empty): 1688299 total terms: 752785964 -search_command: target/appassembler/bin/SearchCollection -topic_root: src/main/resources/topics-and-qrels/ -qrels_root: src/main/resources/topics-and-qrels/ -index_root: -ranking_root: - -evals: - - command: tools/eval/trec_eval.9.0.4/trec_eval +metrics: + - metric: MAP + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m map separator: "\t" parse_index: 2 - metric: map metric_precision: 4 can_combine: true - - command: tools/eval/trec_eval.9.0.4/trec_eval + - metric: P30 + command: tools/eval/trec_eval.9.0.4/trec_eval params: -m P.30 separator: "\t" parse_index: 2 - metric: p30 metric_precision: 4 can_combine: true topic_reader: Trec +topic_root: src/main/resources/topics-and-qrels/ +qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[Wt10g (Topics 451-550)](../src/main/resources/topics-and-qrels/topics.adhoc.451-550.txt)" + id: wt10g path: topics.adhoc.451-550.txt qrel: qrels.adhoc.451-550.txt models: - name: bm25 display: BM25 - params: - - -bm25 + params: -bm25 results: - map: + MAP: - 0.1991 - p30: + P30: - 0.2211 - name: bm25+rm3 display: +RM3 - params: - - -bm25 - - -rm3 + params: -bm25 -rm3 results: - map: + MAP: - 0.2270 - p30: + P30: - 0.2401 - name: bm25+ax display: +Ax - params: - - -bm25 - - -axiom - - -axiom.beta 0.1 - - -axiom.deterministic - - -rerankCutoff 20 + params: -bm25 -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2196 - p30: + P30: - 0.2466 - name: ql display: QL - params: - - -qld + params: -qld results: - map: + MAP: - 0.2021 - p30: + P30: - 0.2180 - name: ql+rm3 display: +RM3 - params: - - -qld - - -rm3 + params: -qld -rm3 results: - map: + MAP: - 0.2188 - p30: + P30: - 0.2306 - name: ql+ax display: +Ax - params: - - -qld - - -axiom - - -axiom.beta 0.1 - - -axiom.deterministic - - -rerankCutoff 20 + params: -qld -axiom -axiom.beta 0.1 -axiom.deterministic -rerankCutoff 20 results: - map: + MAP: - 0.2268 - p30: + P30: - 0.2469 diff --git a/src/test/java/io/anserini/doc/DataModel.java b/src/test/java/io/anserini/doc/DataModel.java index 7f3cc735d7..f7001bef32 100755 --- a/src/test/java/io/anserini/doc/DataModel.java +++ b/src/test/java/io/anserini/doc/DataModel.java @@ -26,27 +26,11 @@ public class DataModel { private static final String INDEX_COMMAND = "target/appassembler/bin/IndexCollection"; + private static final String SEARCH_COMMAND = "target/appassembler/bin/SearchCollection"; private String corpus; private String corpus_path; - private String search_command; - private String topic_root; - private String qrels_root; - private String index_root; - private String ranking_root; - private String collection; - private String generator; - private int threads; - private String topic_reader; - private String index_path; - private String index_options; - private List search_options; - private Map index_stats; - private List models; - private List topics; - private List evals; - public String getCorpus() { return corpus; } @@ -63,116 +47,115 @@ public void setCorpus_path(String corpus_path) { this.corpus_path = corpus_path; } - public Map getIndex_stats() { - return index_stats; - } + private String index_path; + private String collection; + private String generator; + private int threads; + private String index_options; + private Map index_stats; - public void setIndex_stats(Map index_stats) { - this.index_stats = index_stats; + public String getIndex_path() { + return index_path; } - public List getEvals() { - return evals; + public void setIndex_path(String index_path) { + this.index_path = index_path; } - public void setEvals(List evals) { - this.evals = evals; + public String getCollection() { + return collection; } - public List getTopics() { - return topics; + public void setCollection(String collection) { + this.collection = collection; } - public void setTopics(List topics) { - this.topics = topics; + public String getGenerator() { + return generator; } - public List getModels() { - return models; + public void setGenerator(String generator) { + this.generator = generator; } - public void setModels(List models) { - this.models = models; + public int getThreads() { + return threads; } - public String getSearch_command() { - return search_command; + public void setThreads(int threads) { + this.threads = threads; } - public void setSearch_command(String search_command) { - this.search_command = search_command; + public String getIndex_options() { + return index_options; } - public String getTopic_root() { - return topic_root; + public void setIndex_options(String index_options) { + this.index_options = index_options; } - public void setTopic_root(String topic_root) { - this.topic_root = topic_root; + public Map getIndex_stats() { + return index_stats; } - public String getQrels_root() { - return qrels_root; + public void setIndex_stats(Map index_stats) { + this.index_stats = index_stats; } - public void setQrels_root(String qrels_root) { - this.qrels_root = qrels_root; - } + private String topic_root; + private String qrels_root; + private String topic_reader; - public String getIndex_root() { - return index_root; + public String getTopic_reader() { + return topic_reader; } - public void setIndex_root(String index_root) { - this.index_root = index_root; + public void setTopic_reader(String topic_reader) { + this.topic_reader = topic_reader; } - public String getRanking_root() { - return ranking_root; + public String getTopic_root() { + return topic_root; } - public void setRanking_root(String ranking_root) { - this.ranking_root = ranking_root; + public void setTopic_root(String topic_root) { + this.topic_root = topic_root; } - public String getCollection() { - return collection; + public String getQrels_root() { + return qrels_root; } - public void setCollection(String collection) { - this.collection = collection; + public void setQrels_root(String qrels_root) { + this.qrels_root = qrels_root; } - public String getGenerator() { - return generator; - } + private List metrics; + private List models; + private List topics; - public void setGenerator(String generator) { - this.generator = generator; + public List getMetrics() { + return metrics; } - public int getThreads() { - return threads; + public void setMetrics(List evals) { + this.metrics = evals; } - public void setThreads(int threads) { - this.threads = threads; + public List getTopics() { + return topics; } - public String getTopic_reader() { - return topic_reader; + public void setTopics(List topics) { + this.topics = topics; } - public void setTopic_reader(String topic_reader) { - this.topic_reader = topic_reader; - } - - public String getIndex_path() { - return index_path; + public List getModels() { + return models; } - public void setIndex_path(String index_path) { - this.index_path = index_path; + public void setModels(List models) { + this.models = models; } static class Topic { @@ -194,19 +177,20 @@ static class Topic { static class Model { private String name; private String display; - private List params; + private String params; private Map> results; public String getName() { return name; } public void setName(String name) { this.name = name; } public Map> getResults() { return results; } + public void setDisplay(String display) { this.display = display; } public String getDisplay() { return display; } public void setResults(Map> results) { this.results = results; } - public List getParams() { return params; } - public void setParams(List params) { this.params = params; } + public String getParams() { return params; } + public void setParams(String params) { this.params = params; } } - static class Eval { + static class Metric { private String command; private String params; private String separator; @@ -233,65 +217,31 @@ static class Eval { public void setMetric_precision(int metric_precision) { this.metric_precision = metric_precision; } } - public String getIndex_options() { - return index_options; - } - - public void setIndex_options(String index_options) { - this.index_options = index_options; - } - - public List getSearch_options() { - return search_options; - } - - public void setSearch_options(List search_options) { - this.search_options = search_options; - } - public String generateIndexingCommand(String collection) { - boolean containRawDocs = false; - if (getIndex_options().contains("-storeRaw")) { - containRawDocs = true; - } - StringBuilder builder = new StringBuilder(); - builder.append("nohup sh "); - builder.append(INDEX_COMMAND); - builder.append(" -collection ").append(getCollection()).append(" \\\n"); - builder.append(" -input ").append("/path/to/"+collection).append(" \\\n"); - builder.append(" -index ").append(getIndex_path()).append(" \\\n"); - builder.append(" -generator ").append(getGenerator()).append(" \\\n"); - builder.append(" -threads ").append(getThreads()); - builder.append(" ").append(getIndex_options()); - builder.append(" \\\n").append(String.format(" >& logs/log.%s &", collection)); + builder.append(INDEX_COMMAND).append(" \\\n"); + builder.append(" -collection ").append(getCollection()).append(" \\\n"); + builder.append(" -input ").append("/path/to/"+collection).append(" \\\n"); + builder.append(" -index ").append(getIndex_path()).append(" \\\n"); + builder.append(" -generator ").append(getGenerator()).append(" \\\n"); + builder.append(" -threads ").append(getThreads()); + builder.append(" ").append(getIndex_options()).append(" \\\n"); + builder.append(String.format(" >& logs/log.%s &", collection)); return builder.toString(); } public String generateRankingCommand(String collection) { - boolean containRawDocs = false; - if (getIndex_options().contains("-storeRaw")) { - containRawDocs = true; - } - StringBuilder builder = new StringBuilder(); for (Model model : getModels()) { for (Topic topic : getTopics()) { - builder.append("nohup "); - builder.append(getSearch_command()); - builder.append(" ").append("-index").append(" ").append(getIndex_path()).append(" \\\n"); - builder.append(" ").append("-topicreader").append(" ").append(getTopic_reader()); - builder.append(" ").append("-topics").append(" ").append(Paths.get(getTopic_root(), topic.getPath()).toString()).append(" \\\n"); - builder.append(" ").append("-output").append(" ").append("runs/run."+collection+"."+model.getName()+"."+topic.getPath()).append(" \\\n"); - if (getSearch_options() != null) { - for (String option : getSearch_options()) { - builder.append(" ").append(option); - } - } + builder.append(SEARCH_COMMAND).append(" \\\n"); + builder.append(" -index").append(" ").append(getIndex_path()).append(" \\\n"); + builder + .append(" -topics").append(" ").append(Paths.get(getTopic_root(), topic.getPath()).toString()) + .append(" -topicreader").append(" ").append(getTopic_reader()).append(" \\\n"); + builder.append(" -output").append(" ").append("runs/run."+collection+"."+model.getName()+"."+topic.getPath()).append(" \\\n"); if (model.getParams() != null) { - for (String option : model.getParams()) { - builder.append(" ").append(option); - } + builder.append(" ").append(model.getParams()); } builder.append(" &"); // nohup builder.append("\n"); @@ -307,15 +257,15 @@ public String generateEvalCommand(String collection) { for (Model model : getModels()) { for (Topic topic : getTopics()) { Map>> combinedEvalCmd = new HashMap<>(); - for (Eval eval : getEvals()) { + for (Metric eval : getMetrics()) { String evalCmd = eval.getCommand(); String evalCmdOption = ""; if (eval.getParams() != null) { - evalCmdOption += " "+eval.getParams(); + evalCmdOption += " " + eval.getParams(); } String evalCmdResidual = ""; - evalCmdResidual += " "+Paths.get(getQrels_root(), topic.getQrel()); - evalCmdResidual += " runs/run."+collection+"."+model.getName()+"."+topic.getPath(); + evalCmdResidual += " " + Paths.get(getQrels_root(), topic.getQrel()); + evalCmdResidual += " runs/run." + collection+ "." + model.getName() + "." + topic.getPath(); evalCmdResidual += "\n"; if (eval.isCan_combine() || evalCmdOption.isEmpty()) { combinedEvalCmd.putIfAbsent(evalCmd, new HashMap<>()); @@ -339,11 +289,11 @@ public String generateEvalCommand(String collection) { public String generateEffectiveness(String collection) { StringBuilder builder = new StringBuilder(); - for (Eval eval : getEvals()) { - builder.append(String.format("%1$-40s|", eval.getMetric().toUpperCase())); + for (Metric eval : getMetrics()) { + builder.append(String.format("%1$-40s|", eval.getMetric())); for (Model model : getModels()) { if (model.getDisplay() == null) { - builder.append(String.format(" %1$-10s|", model.getName().toUpperCase())); + builder.append(String.format(" %1$-10s|", model.getName())); } else { builder.append(String.format(" %1$-10s|", model.getDisplay())); } diff --git a/src/test/java/io/anserini/doc/GenerateRegressionDocsTest.java b/src/test/java/io/anserini/doc/GenerateRegressionDocsTest.java index 792468f572..43428adb4b 100755 --- a/src/test/java/io/anserini/doc/GenerateRegressionDocsTest.java +++ b/src/test/java/io/anserini/doc/GenerateRegressionDocsTest.java @@ -19,7 +19,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.text.StrSubstitutor; +import org.apache.commons.text.StringSubstitutor; import org.junit.Test; import java.io.File; @@ -52,7 +52,7 @@ public void main() throws Exception { valuesMap.put("eval_cmds", data.generateEvalCommand(corpus)); valuesMap.put("effectiveness", data.generateEffectiveness(corpus)); - StrSubstitutor sub = new StrSubstitutor(valuesMap); + StringSubstitutor sub = new StringSubstitutor(valuesMap); URL template = GenerateRegressionDocsTest.class.getResource(String.format("/docgen/templates/%s.template", testName)); Scanner scanner = new Scanner(new File(template.toURI()), "UTF-8"); String text = scanner.useDelimiter("\\A").next(); diff --git a/src/test/java/io/anserini/doc/JDIQ2018EffectivenessDocsTest.java b/src/test/java/io/anserini/doc/JDIQ2018EffectivenessDocsTest.java index 8bb2e3434d..ae7747b0aa 100755 --- a/src/test/java/io/anserini/doc/JDIQ2018EffectivenessDocsTest.java +++ b/src/test/java/io/anserini/doc/JDIQ2018EffectivenessDocsTest.java @@ -20,7 +20,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.text.StrSubstitutor; +import org.apache.commons.text.StringSubstitutor; import org.junit.Test; import java.io.File; @@ -119,7 +119,8 @@ public void main() throws Exception { Model data = mapper.readValue(new File(yaml.toURI()), Model.class); Map valuesMap = new HashMap<>(); valuesMap.put("results", data.generateEffectiveness()); - StrSubstitutor sub = new StrSubstitutor(valuesMap); + + StringSubstitutor sub = new StringSubstitutor(valuesMap); URL template = GenerateRegressionDocsTest.class.getResource("/jdiq2018/doc.template"); Scanner scanner = new Scanner(new File(template.toURI()), "UTF-8"); String text = scanner.useDelimiter("\\A").next();