From dfbbba6bd5844331eb306b38e4e3e262631b8c42 Mon Sep 17 00:00:00 2001 From: tgupta6 <tgupta6@illinois.edu> Date: Sat, 11 Mar 2017 11:53:14 -0600 Subject: [PATCH] merging scripts and modifications for train + val training --- answer_classifier_cached_features/train.py | 6 +-- constants_crunchy.py | 12 +++++- ...n_val_annotations_with_parsed_questions.py | 22 ++++++++++ data/merge_train_val_resnet_features.py | 40 +++++++++++++++++++ data/merge_vqa_train_val_qids.py | 19 +++++++++ data/vqa_cached_features.py | 7 ++-- 6 files changed, 99 insertions(+), 7 deletions(-) create mode 100644 data/merge_train_val_annotations_with_parsed_questions.py create mode 100644 data/merge_train_val_resnet_features.py create mode 100644 data/merge_vqa_train_val_qids.py diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py index 4fcfe27..5ea1847 100644 --- a/answer_classifier_cached_features/train.py +++ b/answer_classifier_cached_features/train.py @@ -667,9 +667,9 @@ def create_scratch_initializer(graph, sess): def create_vqa_batch_generator(): data_mgr = vqa_data.data( - constants.vqa_train_resnet_feat_dir, - constants.vqa_train_anno, - constants.vqa_train_subset_qids, + constants.vqa_train_val_resnet_feat_dir, + constants.vqa_train_val_anno, + constants.vqa_train_val_qids, constants.vocab_json, constants.vqa_answer_vocab_json, constants.object_labels_json, diff --git a/constants_crunchy.py b/constants_crunchy.py index add67d3..160c2d1 100644 --- a/constants_crunchy.py +++ b/constants_crunchy.py @@ -5,7 +5,7 @@ def mkdir_if_not_exists(dir_name): if not os.path.exists(dir_name): os.mkdir(dir_name) -experiment_name = 'trial_new_rel_feat' +experiment_name = 'resnet_50_model_train_val' ########################################################################## # Machine Specific Paths # @@ -210,6 +210,16 @@ vqa_val_qids = os.path.join( vqa_basedir, 'val_qids.json') +vqa_train_val_resnet_feat_dir = os.path.join( + vqa_basedir, + 'train_val2014_cropped_large_resnet_features') +vqa_train_val_qids = os.path.join( + vqa_basedir, + 'train_val_qids.json') +vqa_train_val_anno = os.path.join( + vqa_basedir, + 'train_val_anno.json') + vqa_test_resnet_feat_dir = os.path.join( vqa_basedir, 'test2015_cropped_large_resnet_features') diff --git a/data/merge_train_val_annotations_with_parsed_questions.py b/data/merge_train_val_annotations_with_parsed_questions.py new file mode 100644 index 0000000..4493c85 --- /dev/null +++ b/data/merge_train_val_annotations_with_parsed_questions.py @@ -0,0 +1,22 @@ +import constants +import ujson +import pdb + +def main(): + with open(constants.vqa_train_anno,'r') as file: + vqa_train_anno = ujson.load(file) + print(len(vqa_train_anno)) + + with open(constants.vqa_val_anno,'r') as file: + vqa_val_anno = ujson.load(file) + print(len(vqa_val_anno)) + + vqa_train_val_anno = vqa_train_anno + vqa_train_val_anno.update(vqa_val_anno) + print(len(vqa_train_val_anno)) + + with open(constants.vqa_train_val_anno,'w') as file: + ujson.dump(vqa_train_val_anno,file) + +if __name__=='__main__': + main() diff --git a/data/merge_train_val_resnet_features.py b/data/merge_train_val_resnet_features.py new file mode 100644 index 0000000..2ed3a38 --- /dev/null +++ b/data/merge_train_val_resnet_features.py @@ -0,0 +1,40 @@ +import os +import glob +import pdb +BASE1='/home/ssd/VQA/train2014_cropped_large_resnet_features/' +BASE2='/home/ssd/VQA/val2014_cropped_large_resnet_features/' +TARGET='/home/ssd/VQA/train_val2014_cropped_large_resnet_features/' + + +resnet_feat_filenames = glob.glob(BASE1 + '*.npy') + +for filename in resnet_feat_filenames: + print(filename) + filename_new = os.path.basename(filename) + filename_new = filename_new.replace('train','train_val') + target_filename = os.path.join( + TARGET, + filename_new) + base_filename = filename + cmd = 'ln -s {} {}'.format( + base_filename, + target_filename) + os.system(cmd) + +print('-'*10) + +resnet_feat_filenames = glob.glob(BASE2 + '*.npy') + +for filename in resnet_feat_filenames: + print(filename) + filename_new = os.path.basename(filename) + filename_new = filename_new.replace('val','train_val') + target_filename = os.path.join( + TARGET, + filename_new) + base_filename = filename + cmd = 'ln -s {} {}'.format( + base_filename, + target_filename) + os.system(cmd) + diff --git a/data/merge_vqa_train_val_qids.py b/data/merge_vqa_train_val_qids.py new file mode 100644 index 0000000..d24d3c0 --- /dev/null +++ b/data/merge_vqa_train_val_qids.py @@ -0,0 +1,19 @@ +import constants +import ujson +import pdb + +def main(): + with open(constants.vqa_train_subset_qids,'r') as file: + vqa_train_subset_qids = ujson.load(file) + + with open(constants.vqa_val_qids,'r') as file: + vqa_val_qids = ujson.load(file) + + vqa_train_val_qids = vqa_train_subset_qids + vqa_val_qids + + with open(constants.vqa_train_val_qids,'w') as file: + ujson.dump(vqa_train_val_qids,file) + + +if __name__=='__main__': + main() diff --git a/data/vqa_cached_features.py b/data/vqa_cached_features.py index e0e1153..16125e1 100644 --- a/data/vqa_cached_features.py +++ b/data/vqa_cached_features.py @@ -245,9 +245,10 @@ class data(): def get_region_feats(self, sample): question_id = self.sample_to_question_dict[sample] image_id = self.anno[str(question_id)]['image_id'] - data_split = re.split( - '_', - os.path.split(self.feat_dir)[1])[0] + # data_split = re.split( + # '_', + # os.path.split(self.feat_dir)[1])[0] + data_split = 'train_val2014' feat_path = os.path.join( self.feat_dir, -- GitLab