Skip to content
Snippets Groups Projects
Commit 07a48c9b authored by tgupta6's avatar tgupta6
Browse files

parsing script for vqa test and testdev

parent c9c1b754
No related branches found
No related tags found
No related merge requests found
......@@ -55,6 +55,19 @@ def parse_annotations(input_json, output_json):
ujson.dump(parsed_anno, file, indent=4, sort_keys=True)
def parse_test_annotations(questions_json, output_json):
print 'Reading json file: {}'.format(questions_json)
with open(questions_json, 'r') as file:
data = ujson.load(file)
parsed_anno = dict()
for ques_data in data['questions']:
parsed_anno[ques_data['question_id']] = ques_data
print 'Writing constructed dict to file: {}'.format(output_json)
with open(output_json, 'w') as file:
ujson.dump(parsed_anno, file, indent=4, sort_keys=True)
def write_json_with_parsed_questions(
input_json,
questions_txt_filename,
......@@ -111,6 +124,19 @@ def add_mcq_options(anno_json, questions_json, out_json):
with open(out_json, 'w') as file:
ujson.dump(anno, file, indent=4, sort_keys=True)
def add_mcq_answer_to_test(anno_json):
print 'Reading json file: {}'.format(anno_json)
with open(anno_json, 'r') as file:
anno = ujson.load(file)
print 'Adding multiple_choice_answer (dummy) ...'
for key, val in anno.items():
val['multiple_choice_answer'] = val['multiple_choices'][0]
print 'Writing json file: {}'.format(anno_json)
with open(anno_json, 'w') as file:
ujson.dump(anno, file, indent=4, sort_keys=True)
def add_noun_adjective_labels(
anno_json,
......@@ -242,6 +268,18 @@ def list_of_val_question_ids(
with open(val_qids_json,'w') as file:
ujson.dump(qids, file)
def list_of_test_question_ids(
json_anno,
test_qids_json):
with open(json_anno,'r') as file:
anno_data = ujson.load(file)
qids = anno_data.keys()
with open(test_qids_json,'w') as file:
ujson.dump(qids, file)
def counts_of_question_objects_and_attributes(
json_anno,
......@@ -318,34 +356,35 @@ def check_clash(vqa_hash, genome_hash):
if __name__=='__main__':
datadir = '/home/ssd/VQA/'
mode = 'val'
mode = 'test-dev'
year = '2015'
questions_json_filename = os.path.join(
datadir,
'MultipleChoice_mscoco_' + mode + '2014_questions.json')
'MultipleChoice_mscoco_' + mode + year + '_questions.json')
questions_txt_filename = os.path.join(
datadir,
'MultipleChoice_mscoco_' + mode + '2014_questions_dump.txt')
'MultipleChoice_mscoco_' + mode + year + '_questions_dump.txt')
question_ids_txt_filename = os.path.join(
datadir,
'MultipleChoice_mscoco_' + mode + '2014_question_ids_dump.txt')
'MultipleChoice_mscoco_' + mode + year + '_question_ids_dump.txt')
questions_parsed_txt_filename = os.path.join(
datadir,
'MultipleChoice_mscoco_' + mode + '2014_questions_parsed.txt')
'MultipleChoice_mscoco_' + mode + year + '_questions_parsed.txt')
annotations_json_filename = os.path.join(
datadir,
'mscoco_' + mode + '2014_annotations.json')
'mscoco_' + mode + year + '_annotations.json')
annotations_parsed_json_filename = os.path.join(
datadir,
'mscoco_' + mode + '2014_annotations_parsed.json')
'mscoco_' + mode + year + '_annotations_parsed.json')
annotations_with_parsed_questions_filename = os.path.join(
datadir,
'mscoco_' + mode + '2014_annotations_with_parsed_questions.json')
'mscoco_' + mode + year + '_annotations_with_parsed_questions.json')
answer_vocab_filename = os.path.join(
datadir,
......@@ -353,43 +392,51 @@ if __name__=='__main__':
nouns_json_filename = os.path.join(
datadir,
'mscoco_' + mode + '2014_question_nouns.json')
'mscoco_' + mode + year + '_question_nouns.json')
adjectives_json_filename = os.path.join(
datadir,
'mscoco_' + mode + '2014_question_adjectives.json')
'mscoco_' + mode + year + '_question_adjectives.json')
ans_vocab_size = 5000
# dump_questions_to_txt(
# questions_json_filename,
# questions_txt_filename,
# question_ids_txt_filename)
dump_questions_to_txt(
questions_json_filename,
questions_txt_filename,
question_ids_txt_filename)
# parse_questions(
# questions_txt_filename,
# questions_parsed_txt_filename)
# parse_annotations(
# annotations_json_filename,
# annotations_parsed_json_filename)
# write_json_with_parsed_questions(
# annotations_parsed_json_filename,
# questions_parsed_txt_filename,
# question_ids_txt_filename,
# annotations_with_parsed_questions_filename)
parse_questions(
questions_txt_filename,
questions_parsed_txt_filename)
if 'test' in mode:
parse_test_annotations(
questions_json_filename,
annotations_parsed_json_filename)
else:
parse_annotations(
annotations_json_filename,
annotations_parsed_json_filename)
write_json_with_parsed_questions(
annotations_parsed_json_filename,
questions_parsed_txt_filename,
question_ids_txt_filename,
annotations_with_parsed_questions_filename)
# add_mcq_options(
# annotations_with_parsed_questions_filename,
# questions_json_filename,
# annotations_with_parsed_questions_filename)
# add_noun_adjective_labels (
# annotations_with_parsed_questions_filename,
# questions_txt_filename,
# question_ids_txt_filename,
# annotations_with_parsed_questions_filename)
if 'test' in mode:
add_mcq_answer_to_test(annotations_with_parsed_questions_filename)
else:
add_mcq_options(
annotations_with_parsed_questions_filename,
questions_json_filename,
annotations_with_parsed_questions_filename)
add_noun_adjective_labels (
annotations_with_parsed_questions_filename,
questions_txt_filename,
question_ids_txt_filename,
annotations_with_parsed_questions_filename)
# if mode=='train':
# create_ans_vocab(
......@@ -419,6 +466,15 @@ if __name__=='__main__':
# list_of_val_question_ids(
# annotations_with_parsed_questions_filename,
# val_qids_json)
if 'test' in mode:
test_qids_json = os.path.join(
datadir,
mode + '_qids.json')
list_of_test_question_ids(
annotations_with_parsed_questions_filename,
test_qids_json)
# counts_of_question_objects_and_attributes(
# annotations_with_parsed_questions_filename,
......@@ -428,6 +484,6 @@ if __name__=='__main__':
# generate_md5hash(datadir)
check_clash(
'/home/ssd/VQA/md5_hash_val2015.json',
'/home/ssd/VisualGenome/md5_hash.json')
# check_clash(
# '/home/ssd/VQA/md5_hash_val2015.json',
# '/home/ssd/VisualGenome/md5_hash.json')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment