From 07a48c9b9d77022f89e17a4e118e15a25e59c58a Mon Sep 17 00:00:00 2001 From: tgupta6 <tgupta6@illinois.edu> Date: Mon, 24 Oct 2016 12:00:30 -0500 Subject: [PATCH] parsing script for vqa test and testdev --- vqa_parser.py | 136 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 96 insertions(+), 40 deletions(-) diff --git a/vqa_parser.py b/vqa_parser.py index 0bc478f..7ca03ea 100644 --- a/vqa_parser.py +++ b/vqa_parser.py @@ -55,6 +55,19 @@ def parse_annotations(input_json, output_json): ujson.dump(parsed_anno, file, indent=4, sort_keys=True) +def parse_test_annotations(questions_json, output_json): + print 'Reading json file: {}'.format(questions_json) + with open(questions_json, 'r') as file: + data = ujson.load(file) + + parsed_anno = dict() + for ques_data in data['questions']: + parsed_anno[ques_data['question_id']] = ques_data + + print 'Writing constructed dict to file: {}'.format(output_json) + with open(output_json, 'w') as file: + ujson.dump(parsed_anno, file, indent=4, sort_keys=True) + def write_json_with_parsed_questions( input_json, questions_txt_filename, @@ -111,6 +124,19 @@ def add_mcq_options(anno_json, questions_json, out_json): with open(out_json, 'w') as file: ujson.dump(anno, file, indent=4, sort_keys=True) +def add_mcq_answer_to_test(anno_json): + print 'Reading json file: {}'.format(anno_json) + with open(anno_json, 'r') as file: + anno = ujson.load(file) + + print 'Adding multiple_choice_answer (dummy) ...' + for key, val in anno.items(): + val['multiple_choice_answer'] = val['multiple_choices'][0] + + print 'Writing json file: {}'.format(anno_json) + with open(anno_json, 'w') as file: + ujson.dump(anno, file, indent=4, sort_keys=True) + def add_noun_adjective_labels( anno_json, @@ -242,6 +268,18 @@ def list_of_val_question_ids( with open(val_qids_json,'w') as file: ujson.dump(qids, file) +def list_of_test_question_ids( + json_anno, + test_qids_json): + + with open(json_anno,'r') as file: + anno_data = ujson.load(file) + + qids = anno_data.keys() + + with open(test_qids_json,'w') as file: + ujson.dump(qids, file) + def counts_of_question_objects_and_attributes( json_anno, @@ -318,34 +356,35 @@ def check_clash(vqa_hash, genome_hash): if __name__=='__main__': datadir = '/home/ssd/VQA/' - mode = 'val' + mode = 'test-dev' + year = '2015' questions_json_filename = os.path.join( datadir, - 'MultipleChoice_mscoco_' + mode + '2014_questions.json') + 'MultipleChoice_mscoco_' + mode + year + '_questions.json') questions_txt_filename = os.path.join( datadir, - 'MultipleChoice_mscoco_' + mode + '2014_questions_dump.txt') + 'MultipleChoice_mscoco_' + mode + year + '_questions_dump.txt') question_ids_txt_filename = os.path.join( datadir, - 'MultipleChoice_mscoco_' + mode + '2014_question_ids_dump.txt') + 'MultipleChoice_mscoco_' + mode + year + '_question_ids_dump.txt') questions_parsed_txt_filename = os.path.join( datadir, - 'MultipleChoice_mscoco_' + mode + '2014_questions_parsed.txt') + 'MultipleChoice_mscoco_' + mode + year + '_questions_parsed.txt') annotations_json_filename = os.path.join( datadir, - 'mscoco_' + mode + '2014_annotations.json') + 'mscoco_' + mode + year + '_annotations.json') annotations_parsed_json_filename = os.path.join( datadir, - 'mscoco_' + mode + '2014_annotations_parsed.json') + 'mscoco_' + mode + year + '_annotations_parsed.json') annotations_with_parsed_questions_filename = os.path.join( datadir, - 'mscoco_' + mode + '2014_annotations_with_parsed_questions.json') + 'mscoco_' + mode + year + '_annotations_with_parsed_questions.json') answer_vocab_filename = os.path.join( datadir, @@ -353,43 +392,51 @@ if __name__=='__main__': nouns_json_filename = os.path.join( datadir, - 'mscoco_' + mode + '2014_question_nouns.json') + 'mscoco_' + mode + year + '_question_nouns.json') adjectives_json_filename = os.path.join( datadir, - 'mscoco_' + mode + '2014_question_adjectives.json') + 'mscoco_' + mode + year + '_question_adjectives.json') ans_vocab_size = 5000 - # dump_questions_to_txt( - # questions_json_filename, - # questions_txt_filename, - # question_ids_txt_filename) + dump_questions_to_txt( + questions_json_filename, + questions_txt_filename, + question_ids_txt_filename) - # parse_questions( - # questions_txt_filename, - # questions_parsed_txt_filename) - - # parse_annotations( - # annotations_json_filename, - # annotations_parsed_json_filename) - - # write_json_with_parsed_questions( - # annotations_parsed_json_filename, - # questions_parsed_txt_filename, - # question_ids_txt_filename, - # annotations_with_parsed_questions_filename) + parse_questions( + questions_txt_filename, + questions_parsed_txt_filename) + + if 'test' in mode: + parse_test_annotations( + questions_json_filename, + annotations_parsed_json_filename) + else: + parse_annotations( + annotations_json_filename, + annotations_parsed_json_filename) + + write_json_with_parsed_questions( + annotations_parsed_json_filename, + questions_parsed_txt_filename, + question_ids_txt_filename, + annotations_with_parsed_questions_filename) - # add_mcq_options( - # annotations_with_parsed_questions_filename, - # questions_json_filename, - # annotations_with_parsed_questions_filename) - - # add_noun_adjective_labels ( - # annotations_with_parsed_questions_filename, - # questions_txt_filename, - # question_ids_txt_filename, - # annotations_with_parsed_questions_filename) + if 'test' in mode: + add_mcq_answer_to_test(annotations_with_parsed_questions_filename) + else: + add_mcq_options( + annotations_with_parsed_questions_filename, + questions_json_filename, + annotations_with_parsed_questions_filename) + + add_noun_adjective_labels ( + annotations_with_parsed_questions_filename, + questions_txt_filename, + question_ids_txt_filename, + annotations_with_parsed_questions_filename) # if mode=='train': # create_ans_vocab( @@ -419,6 +466,15 @@ if __name__=='__main__': # list_of_val_question_ids( # annotations_with_parsed_questions_filename, # val_qids_json) + + if 'test' in mode: + test_qids_json = os.path.join( + datadir, + mode + '_qids.json') + + list_of_test_question_ids( + annotations_with_parsed_questions_filename, + test_qids_json) # counts_of_question_objects_and_attributes( # annotations_with_parsed_questions_filename, @@ -428,6 +484,6 @@ if __name__=='__main__': # generate_md5hash(datadir) - check_clash( - '/home/ssd/VQA/md5_hash_val2015.json', - '/home/ssd/VisualGenome/md5_hash.json') + # check_clash( + # '/home/ssd/VQA/md5_hash_val2015.json', + # '/home/ssd/VisualGenome/md5_hash.json') -- GitLab