From 8194b8e91fe799fcb0d3b4348e9d710a0aa1ad26 Mon Sep 17 00:00:00 2001 From: Jasper Xian <41269031+jasper-xian@users.noreply.github.com> Date: Tue, 10 Oct 2023 17:25:46 -0400 Subject: [PATCH] Add AToMiC dense validation qrels + topics (#2219) * add atomic image & text topics with ViT-L-14 * add atomic validation qrels * update tools submodule --- src/main/java/io/anserini/eval/Qrels.java | 4 +++- src/main/java/io/anserini/search/topicreader/Topics.java | 4 ++++ .../java/io/anserini/search/topicreader/TopicReaderTest.java | 4 ++-- tools | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/java/io/anserini/eval/Qrels.java b/src/main/java/io/anserini/eval/Qrels.java index 6fe2b0f8cb..f5d291d1ae 100644 --- a/src/main/java/io/anserini/eval/Qrels.java +++ b/src/main/java/io/anserini/eval/Qrels.java @@ -161,7 +161,9 @@ public enum Qrels { MIRACL_V10_TH_DEV("qrels.miracl-v1.0-th-dev.tsv"), MIRACL_V10_ZH_DEV("qrels.miracl-v1.0-zh-dev.tsv"), MIRACL_V10_DE_DEV("qrels.miracl-v1.0-de-dev.tsv"), - MIRACL_V10_YO_DEV("qrels.miracl-v1.0-yo-dev.tsv"); + MIRACL_V10_YO_DEV("qrels.miracl-v1.0-yo-dev.tsv"), + ATOMIC_VAL_T2I("qrels.atomic.validation.t2i.trec"), + ATOMIC_VAL_I2T("qrels.atomic.validation.i2t.trec"); public final String path; diff --git a/src/main/java/io/anserini/search/topicreader/Topics.java b/src/main/java/io/anserini/search/topicreader/Topics.java index 65037fc067..f7f124e295 100755 --- a/src/main/java/io/anserini/search/topicreader/Topics.java +++ b/src/main/java/io/anserini/search/topicreader/Topics.java @@ -400,6 +400,10 @@ public enum Topics { MIRACL_V10_DE_DEV(TsvStringTopicReader.class, "topics.miracl-v1.0-de-dev.tsv"), MIRACL_V10_YO_DEV(TsvStringTopicReader.class, "topics.miracl-v1.0-yo-dev.tsv"), + // AToMiC topics + ATOMIC_V021_TEXT_VAL(JsonStringTopicReader.class, "topics.atomic.validation.text.ViT-L-14.laion2b_s32b_b82k.jsonl"), + ATOMIC_V021_IMAGE_VAL(JsonStringTopicReader.class, "topics.atomic.validation.image.ViT-L-14.laion2b_s32b_b82k.jsonl"), + // unused topics CACM(CacmTopicReader.class, "topics.cacm.txt"), NTCIR_EN_1(NtcirTopicReader.class, "topics.www1.english.txt"), diff --git a/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java b/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java index 73a778ee81..c0cf2b92f9 100755 --- a/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java +++ b/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java @@ -32,13 +32,13 @@ public class TopicReaderTest { public void testIterateThroughAllEnums() { int cnt = 0; for (Topics topic : Topics.values()) { - cnt++; + cnt++; // Verify that we can fetch the TopicReader class given the name of the topic file. String path = topic.path; assertEquals(topic.readerClass, TopicReader.getTopicReaderClassByFile(path)); } - assertEquals(359, cnt); + assertEquals(361, cnt); } @Test diff --git a/tools b/tools index 95d06f6004..bb46b80d9b 160000 --- a/tools +++ b/tools @@ -1 +1 @@ -Subproject commit 95d06f60043837a309331ffdbee7560dd1676313 +Subproject commit bb46b80d9bd0b0ae71878211539c7992a0ece911