From 5e8743f27ea09c312bccdf1c3524efcdba54e617 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Mon, 14 Jun 2021 21:49:19 -0400 Subject: [PATCH] Updated regression log, fixed unit tests (#1562) + Updated regression docs to record overhaul of regressions for MS MARCO {passage, doc} and DL {19, 20}. + Turned back on assert in e2e unit test that was previously commented out accidentally. --- docs/regressions-log.md | 10 ++++++++++ docs/regressions.md | 4 ++-- .../java/io/anserini/collection/JsonCollection.java | 2 ++ .../java/io/anserini/integration/EndToEndTest.java | 3 +-- .../anserini/integration/JsonVectorEndToEndTest.java | 8 ++++---- 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/docs/regressions-log.md b/docs/regressions-log.md index 63d33d6a48..4d884d373b 100644 --- a/docs/regressions-log.md +++ b/docs/regressions-log.md @@ -3,6 +3,16 @@ The following change log details commits to regression tests that alter effectiveness and the addition of new regression tests. This documentation is useful for figuring why results may have changed over time. +### June 14, 2021 + ++ commit [`b58c85`](https://github.com/castorini/anserini/commit/b58c8559b4fc473e857b9ce5ca73523d8d017b41) (06/14/2021) + +Overhauled regressions for MS MARCO {passage, doc} and DL {19, 20}: ++ MS MARCO passage + {doc2query, docTTTTTquery} ++ MS MARCO doc {per-doc, per-passage} x {doc2query, docTTTTTquery} ++ {DL19, DL20} passage + {doc2query, docTTTTTquery} ++ {DL19, DL20} doc {per-doc, per passage} x {doc2query, docTTTTTquery} + ### April 13, 2021 + commit [`868afe`](https://github.com/castorini/anserini/commit/868afe9ec07fa477ce817d7a43dd5723cb4c8a86) (04/13/2021) diff --git a/docs/regressions.md b/docs/regressions.md index 67812b2657..f7859a1f6c 100644 --- a/docs/regressions.md +++ b/docs/regressions.md @@ -69,7 +69,7 @@ nohup python src/main/python/run_regression.py --collection dl19-doc-docTTTTTque nohup python src/main/python/run_regression.py --collection dl19-doc-docTTTTTquery-per-passage >& logs/log.dl19-doc-docTTTTTquery-per-passage & nohup python src/main/python/run_regression.py --collection dl20-passage >& logs/log.dl20-passage & -nohup python src/main/python/run_regression.py --collection dl20-passage-docTTTTTquery >& logs/dl20-passage-docTTTTTquery & +nohup python src/main/python/run_regression.py --collection dl20-passage-docTTTTTquery >& logs/log.dl20-passage-docTTTTTquery & nohup python src/main/python/run_regression.py --collection dl20-doc >& logs/log.dl20-doc & nohup python src/main/python/run_regression.py --collection dl20-doc-per-passage >& logs/log.dl20-doc-per-passage & nohup python src/main/python/run_regression.py --collection dl20-doc-docTTTTTquery-per-doc >& logs/log.dl20-doc-docTTTTTquery-per-doc & @@ -128,7 +128,7 @@ nohup python src/main/python/run_regression.py --index --collection dl19-doc-doc nohup python src/main/python/run_regression.py --index --collection dl19-doc-docTTTTTquery-per-passage >& logs/log.dl19-doc-docTTTTTquery-per-passage & nohup python src/main/python/run_regression.py --index --collection dl20-passage >& logs/log.dl20-passage & -nohup python src/main/python/run_regression.py --index --collection dl20-passage-docTTTTTquery >& logs/dl20-passage-docTTTTTquery & +nohup python src/main/python/run_regression.py --index --collection dl20-passage-docTTTTTquery >& logs/log.dl20-passage-docTTTTTquery & nohup python src/main/python/run_regression.py --index --collection dl20-doc >& logs/log.dl20-doc & nohup python src/main/python/run_regression.py --index --collection dl20-doc-per-passage >& logs/log.dl20-doc-per-passage & nohup python src/main/python/run_regression.py --index --collection dl20-doc-docTTTTTquery-per-doc >& logs/log.dl20-doc-docTTTTTquery-per-doc & diff --git a/src/main/java/io/anserini/collection/JsonCollection.java b/src/main/java/io/anserini/collection/JsonCollection.java index e8e3625df8..3c4776fb3e 100644 --- a/src/main/java/io/anserini/collection/JsonCollection.java +++ b/src/main/java/io/anserini/collection/JsonCollection.java @@ -73,6 +73,7 @@ public JsonCollection(Path path){ this.allowedFileSuffix = new HashSet<>(Arrays.asList(".json", ".jsonl")); } + @SuppressWarnings("unchecked") @Override public FileSegment createFileSegment(Path p) throws IOException { return new Segment(p); @@ -99,6 +100,7 @@ public Segment(Path path) throws IOException { } } + @SuppressWarnings("unchecked") @Override public void readNext() throws NoSuchElementException { if (node == null) { diff --git a/src/test/java/io/anserini/integration/EndToEndTest.java b/src/test/java/io/anserini/integration/EndToEndTest.java index 4ce5d037a2..42cf13e154 100644 --- a/src/test/java/io/anserini/integration/EndToEndTest.java +++ b/src/test/java/io/anserini/integration/EndToEndTest.java @@ -312,8 +312,7 @@ protected void checkRankingResults(String key, String output) throws IOException int cnt = 0; String s; while ((s = br.readLine()) != null) { - //assertEquals(ref[cnt], s); - System.out.println(s); + assertEquals(ref[cnt], s); cnt++; } diff --git a/src/test/java/io/anserini/integration/JsonVectorEndToEndTest.java b/src/test/java/io/anserini/integration/JsonVectorEndToEndTest.java index ef3565911a..bc2581f7ef 100644 --- a/src/test/java/io/anserini/integration/JsonVectorEndToEndTest.java +++ b/src/test/java/io/anserini/integration/JsonVectorEndToEndTest.java @@ -68,10 +68,10 @@ protected void setSearchGroundTruth() { queryTokens.get("3").add("f4"); referenceRunOutput.put("impact", new String[]{ - "1 Q0 doc2 1 0.613600 Anserini", - "2 Q0 doc1 1 0.393100 Anserini", - "3 Q0 doc1 1 0.153100 Anserini", - "3 Q0 doc2 2 0.135500 Anserini"}); + "1 Q0 doc2 1 8.000000 Anserini", + "2 Q0 doc1 1 1.000000 Anserini", + "3 Q0 doc1 1 4.000000 Anserini", + "3 Q0 doc2 2 3.000000 Anserini"}); } }