From a7a9875222a4217c9435937c359859503f93be50 Mon Sep 17 00:00:00 2001 From: Filipe de Avila Belbute Peres Date: Fri, 21 Jul 2017 18:50:42 -0400 Subject: [PATCH 1/3] Added CLEVR task --- parlai/tasks/clevr/__init__.py | 5 +++ parlai/tasks/clevr/agents.py | 66 ++++++++++++++++++++++++++++++++++ parlai/tasks/clevr/build.py | 33 +++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 parlai/tasks/clevr/__init__.py create mode 100644 parlai/tasks/clevr/agents.py create mode 100644 parlai/tasks/clevr/build.py diff --git a/parlai/tasks/clevr/__init__.py b/parlai/tasks/clevr/__init__.py new file mode 100644 index 00000000000..8eff276d72d --- /dev/null +++ b/parlai/tasks/clevr/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. \ No newline at end of file diff --git a/parlai/tasks/clevr/agents.py b/parlai/tasks/clevr/agents.py new file mode 100644 index 00000000000..a55fec6a41a --- /dev/null +++ b/parlai/tasks/clevr/agents.py @@ -0,0 +1,66 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +from parlai.core.dialog_teacher import DialogTeacher +from .build import build + +import json +import os + + +def _path(opt): + build(opt) + dt = opt['datatype'].split(':')[0] + + if dt == 'valid': + dt = 'val' + elif dt != 'train' and dt != 'test': + raise RuntimeError('Not valid datatype.') + + prefix = os.path.join(opt['datapath'], 'CLEVR', 'CLEVR_v1.0') + questions_path = os.path.join(prefix, 'questions', + 'CLEVR_' + dt + '_questions.json') + images_path = os.path.join(prefix, 'images', dt) + + return questions_path, images_path + + +class DefaultTeacher(DialogTeacher): + """ + This version of VisDial inherits from the core Dialog Teacher, which just + requires it to define an iterator over its data `setup_data` in order to + inherit basic metrics, a `act` function, and enables + Hogwild training with shared memory with no extra work. + """ + def __init__(self, opt, shared=None): + + self.datatype = opt['datatype'] + data_path, self.images_path = _path(opt) + opt['datafile'] = data_path + self.id = 'clevr' + + super().__init__(opt, shared) + + def setup_data(self, path): + print('loading: ' + path) + with open(path) as data_file: + clevr = json.load(data_file) + + image_file = None + for ques in clevr['questions']: + # episode done if first question or image changed + new_episode = ques['image_filename'] != image_file + + # only show image at beginning of episode + image_file = ques['image_filename'] + img_path = None + if new_episode: + img_path = os.path.join(self.images_path, image_file) + + question = ques['question'] + answer = [ques['answer']] if ques['split'] != 'test' else None + # TODO cands? + yield (question, answer, None, None, img_path), new_episode diff --git a/parlai/tasks/clevr/build.py b/parlai/tasks/clevr/build.py new file mode 100644 index 00000000000..1a80176f7e6 --- /dev/null +++ b/parlai/tasks/clevr/build.py @@ -0,0 +1,33 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. +# Download and build the data if it does not exist. + +import parlai.core.build_data as build_data +import os + +from parlai.tasks.vqa_v1.build import buildImage + + +def build(opt): + dpath = os.path.join(opt['datapath'], 'CLEVR') + version = 'v1.0' + + if not build_data.built(dpath, version_string=version): + print('[building data: ' + dpath + ']') + # An older version exists, so remove these outdated files. + if build_data.built(dpath): + build_data.remove_dir(dpath) + build_data.make_dir(dpath) + + # Download the data. + fname = 'CLEVR_v1.0.zip' + url = 'https://s3-us-west-1.amazonaws.com/clevr/' + + build_data.download(url + fname, dpath, fname) + build_data.untar(dpath, fname) + + # Mark the data as built. + build_data.mark_done(dpath, version_string=version) From 24b685c2c83a21b40329bc019b1cc771931b026f Mon Sep 17 00:00:00 2001 From: Filipe de Avila Belbute Peres Date: Mon, 24 Jul 2017 01:06:00 -0400 Subject: [PATCH 2/3] Fixed comment --- parlai/tasks/clevr/agents.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/parlai/tasks/clevr/agents.py b/parlai/tasks/clevr/agents.py index a55fec6a41a..77fa9595d23 100644 --- a/parlai/tasks/clevr/agents.py +++ b/parlai/tasks/clevr/agents.py @@ -29,12 +29,6 @@ def _path(opt): class DefaultTeacher(DialogTeacher): - """ - This version of VisDial inherits from the core Dialog Teacher, which just - requires it to define an iterator over its data `setup_data` in order to - inherit basic metrics, a `act` function, and enables - Hogwild training with shared memory with no extra work. - """ def __init__(self, opt, shared=None): self.datatype = opt['datatype'] From f19aaedf997ef9d483a812d355afafbd1b93036b Mon Sep 17 00:00:00 2001 From: Filipe de Avila Belbute Peres Date: Mon, 24 Jul 2017 01:07:17 -0400 Subject: [PATCH 3/3] Removed empty line --- parlai/tasks/clevr/agents.py | 1 - 1 file changed, 1 deletion(-) diff --git a/parlai/tasks/clevr/agents.py b/parlai/tasks/clevr/agents.py index 77fa9595d23..e00b073aac4 100644 --- a/parlai/tasks/clevr/agents.py +++ b/parlai/tasks/clevr/agents.py @@ -30,7 +30,6 @@ def _path(opt): class DefaultTeacher(DialogTeacher): def __init__(self, opt, shared=None): - self.datatype = opt['datatype'] data_path, self.images_path = _path(opt) opt['datafile'] = data_path