From dfbbba6bd5844331eb306b38e4e3e262631b8c42 Mon Sep 17 00:00:00 2001
From: tgupta6 <tgupta6@illinois.edu>
Date: Sat, 11 Mar 2017 11:53:14 -0600
Subject: [PATCH] merging scripts and modifications for train + val training

---
 answer_classifier_cached_features/train.py    |  6 +--
 constants_crunchy.py                          | 12 +++++-
 ...n_val_annotations_with_parsed_questions.py | 22 ++++++++++
 data/merge_train_val_resnet_features.py       | 40 +++++++++++++++++++
 data/merge_vqa_train_val_qids.py              | 19 +++++++++
 data/vqa_cached_features.py                   |  7 ++--
 6 files changed, 99 insertions(+), 7 deletions(-)
 create mode 100644 data/merge_train_val_annotations_with_parsed_questions.py
 create mode 100644 data/merge_train_val_resnet_features.py
 create mode 100644 data/merge_vqa_train_val_qids.py

diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py
index 4fcfe27..5ea1847 100644
--- a/answer_classifier_cached_features/train.py
+++ b/answer_classifier_cached_features/train.py
@@ -667,9 +667,9 @@ def create_scratch_initializer(graph, sess):
 
 def create_vqa_batch_generator():
     data_mgr = vqa_data.data(
-        constants.vqa_train_resnet_feat_dir,
-        constants.vqa_train_anno,
-        constants.vqa_train_subset_qids,
+        constants.vqa_train_val_resnet_feat_dir,
+        constants.vqa_train_val_anno,
+        constants.vqa_train_val_qids,
         constants.vocab_json,
         constants.vqa_answer_vocab_json,
         constants.object_labels_json,
diff --git a/constants_crunchy.py b/constants_crunchy.py
index add67d3..160c2d1 100644
--- a/constants_crunchy.py
+++ b/constants_crunchy.py
@@ -5,7 +5,7 @@ def mkdir_if_not_exists(dir_name):
     if not os.path.exists(dir_name):
         os.mkdir(dir_name)
         
-experiment_name = 'trial_new_rel_feat'
+experiment_name = 'resnet_50_model_train_val'
 
 ##########################################################################
 #                    Machine Specific Paths                              #
@@ -210,6 +210,16 @@ vqa_val_qids = os.path.join(
     vqa_basedir,
     'val_qids.json')
 
+vqa_train_val_resnet_feat_dir = os.path.join(
+    vqa_basedir,
+    'train_val2014_cropped_large_resnet_features')
+vqa_train_val_qids = os.path.join(
+    vqa_basedir,
+    'train_val_qids.json')
+vqa_train_val_anno = os.path.join(
+    vqa_basedir,
+    'train_val_anno.json')
+
 vqa_test_resnet_feat_dir = os.path.join(
     vqa_basedir,
     'test2015_cropped_large_resnet_features')
diff --git a/data/merge_train_val_annotations_with_parsed_questions.py b/data/merge_train_val_annotations_with_parsed_questions.py
new file mode 100644
index 0000000..4493c85
--- /dev/null
+++ b/data/merge_train_val_annotations_with_parsed_questions.py
@@ -0,0 +1,22 @@
+import constants
+import ujson
+import pdb
+
+def main():
+    with open(constants.vqa_train_anno,'r') as file:
+        vqa_train_anno = ujson.load(file)
+        print(len(vqa_train_anno))
+
+    with open(constants.vqa_val_anno,'r') as file:
+        vqa_val_anno = ujson.load(file)
+        print(len(vqa_val_anno))
+
+    vqa_train_val_anno = vqa_train_anno
+    vqa_train_val_anno.update(vqa_val_anno)
+    print(len(vqa_train_val_anno))
+
+    with open(constants.vqa_train_val_anno,'w') as file:
+        ujson.dump(vqa_train_val_anno,file)
+
+if __name__=='__main__':
+    main()
diff --git a/data/merge_train_val_resnet_features.py b/data/merge_train_val_resnet_features.py
new file mode 100644
index 0000000..2ed3a38
--- /dev/null
+++ b/data/merge_train_val_resnet_features.py
@@ -0,0 +1,40 @@
+import os
+import glob
+import pdb
+BASE1='/home/ssd/VQA/train2014_cropped_large_resnet_features/'
+BASE2='/home/ssd/VQA/val2014_cropped_large_resnet_features/'
+TARGET='/home/ssd/VQA/train_val2014_cropped_large_resnet_features/'
+
+
+resnet_feat_filenames = glob.glob(BASE1 + '*.npy')
+
+for filename in resnet_feat_filenames:
+    print(filename)
+    filename_new = os.path.basename(filename)
+    filename_new = filename_new.replace('train','train_val')
+    target_filename = os.path.join(
+        TARGET,
+        filename_new)
+    base_filename = filename
+    cmd = 'ln -s {} {}'.format(
+        base_filename,
+        target_filename)
+    os.system(cmd)
+
+print('-'*10)
+
+resnet_feat_filenames = glob.glob(BASE2 + '*.npy')
+
+for filename in resnet_feat_filenames:
+    print(filename)
+    filename_new = os.path.basename(filename)
+    filename_new = filename_new.replace('val','train_val')
+    target_filename = os.path.join(
+        TARGET,
+        filename_new)
+    base_filename = filename
+    cmd = 'ln -s {} {}'.format(
+        base_filename,
+        target_filename)
+    os.system(cmd)
+
diff --git a/data/merge_vqa_train_val_qids.py b/data/merge_vqa_train_val_qids.py
new file mode 100644
index 0000000..d24d3c0
--- /dev/null
+++ b/data/merge_vqa_train_val_qids.py
@@ -0,0 +1,19 @@
+import constants
+import ujson
+import pdb
+
+def main():
+    with open(constants.vqa_train_subset_qids,'r') as file:
+        vqa_train_subset_qids = ujson.load(file)
+
+    with open(constants.vqa_val_qids,'r') as file:
+        vqa_val_qids = ujson.load(file)
+
+    vqa_train_val_qids = vqa_train_subset_qids + vqa_val_qids
+
+    with open(constants.vqa_train_val_qids,'w') as file:
+        ujson.dump(vqa_train_val_qids,file)
+        
+    
+if __name__=='__main__':
+    main()
diff --git a/data/vqa_cached_features.py b/data/vqa_cached_features.py
index e0e1153..16125e1 100644
--- a/data/vqa_cached_features.py
+++ b/data/vqa_cached_features.py
@@ -245,9 +245,10 @@ class data():
     def get_region_feats(self, sample):
         question_id = self.sample_to_question_dict[sample]
         image_id = self.anno[str(question_id)]['image_id']
-        data_split = re.split(
-            '_',
-            os.path.split(self.feat_dir)[1])[0]
+        # data_split = re.split(
+        #     '_',
+        #     os.path.split(self.feat_dir)[1])[0]
+        data_split = 'train_val2014'
 
         feat_path = os.path.join(
             self.feat_dir,
-- 
GitLab