diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py
index 8f383c6cc6155ef6001094aceba52d309ad0590b..03b406832208af20ccca9425a4f612c44906730f 100644
--- a/answer_classifier_cached_features/train.py
+++ b/answer_classifier_cached_features/train.py
@@ -3,6 +3,7 @@ import object_attribute_classifier_cached_features.inference as feature_graph
 import region_relevance_network.inference as relevance_graph
 import answer_classifier_cached_features.inference as answer_graph
 from tftools import var_collect, placeholder_management
+import tftools.train as multi_rate_train
 import tftools.data
 import losses
 import constants
@@ -529,7 +530,9 @@ class attach_optimizer():
         self.lr = lr
         with graph.tf_graph.as_default():
             all_trainable_vars = tf.trainable_variables()
-            self.not_to_train = []#+ graph.object_attribute_vars
+
+            self.not_to_train = graph.object_attribute_vars + graph.word_vec_vars
+
             vars_to_train = [
                 var for var in all_trainable_vars
                 if var not in self.not_to_train]
@@ -541,19 +544,35 @@ class attach_optimizer():
 
             all_vars = tf.all_variables()
             self.ops = dict()
+            
+            self.optimizer = multi_rate_train.MultiRateOptimizer(
+                tf.train.AdamOptimizer)
 
-            self.add_adam_optimizer(
-                graph.total_loss,
+            self.optimizer.add_variables(
+                self.graph.object_attribute_vars + self.graph.word_vec_vars,
+                learning_rate = 0.1*self.lr)
+
+            
+            self.optimizer.add_variables(
                 vars_to_train,
-                'optimizer')
+                learning_rate = self.lr)
 
-            self.train_op = self.group_all_train_ops()
+            self.train_op = self.optimizer.minimize(graph.total_loss)
+                
+            # self.add_adam_optimizer(
+            #     graph.total_loss,
+            #     vars_to_train,
+            #     'optimizer')
+
+            # self.train_op = self.group_all_train_ops()
+            
             all_vars_with_opt_vars = tf.all_variables()
             self.opt_vars = [var for var in all_vars_with_opt_vars if var not in all_vars]
 
     def filter_out_vars_to_train(self, var_list):
         return [var for var in var_list if var not in self.not_to_train]
 
+    
     def add_adam_optimizer(self, loss, var_list, name):
         var_list = self.filter_out_vars_to_train(var_list)
         if not var_list:
diff --git a/constants_crunchy.py b/constants_crunchy.py
index 7391777dbf87ba0c712520edd97ac001f1ee51b3..5046e0562dd18086514ec64fec623db221c37ec6 100644
--- a/constants_crunchy.py
+++ b/constants_crunchy.py
@@ -203,7 +203,7 @@ answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter)
 
 # Answer eval params
 answer_eval_on = 'val'
-answer_model_to_eval = answer_model + '-45000'
+answer_model_to_eval = answer_model + '-18500'
 
 vqa_results_dir = os.path.join(
     answer_output_dir,
diff --git a/constants_vision_gpu_1.py b/constants_vision_gpu_1.py
new file mode 100644
index 0000000000000000000000000000000000000000..99378eb64e9666cef73dcd0eaf6d3d338f24b499
--- /dev/null
+++ b/constants_vision_gpu_1.py
@@ -0,0 +1,219 @@
+import os
+import pdb
+
+def mkdir_if_not_exists(dir_name):
+    if not os.path.exists(dir_name):
+        os.mkdir(dir_name)
+        
+experiment_name = 'QA_explicit_dot_joint_training_pretrained_fix_pretrained'
+#experiment_name = 'object_attribute_classifier_large_images'
+# Global output directory (all subexperiments will be saved here)
+global_output_dir = '/data/tanmay/GenVQA_Exp_Results'
+
+global_experiment_dir = os.path.join(
+    global_output_dir,
+    experiment_name)
+
+tb_log_dir = os.path.join(
+    global_experiment_dir,
+    'tensorboard_logdir')
+
+mkdir_if_not_exists(global_output_dir)
+mkdir_if_not_exists(global_experiment_dir)
+mkdir_if_not_exists(tb_log_dir)
+
+#height and width to which images are resized before feeding into networks
+image_size = (224, 224) 
+
+# Token to be used if object or attribute variable is unknown
+unknown_token = 'UNK'
+
+# Genome Data paths
+data_absolute_path = '/home/nfs/tgupta6/data/VisualGenome'
+
+image_dir = os.path.join(data_absolute_path, 'cropped_regions_large')
+genome_resnet_feat_dir = os.path.join(
+    data_absolute_path,
+    'cropped_regions_large_resnet_features')
+
+object_labels_json = os.path.join(
+    data_absolute_path,
+    'restructured/object_labels.json')
+
+attribute_labels_json = os.path.join(
+    data_absolute_path,
+    'restructured/attribute_labels.json')
+
+regions_json = os.path.join(
+    data_absolute_path,
+    'restructured/region_with_labels.json')
+
+mean_image_filename = os.path.join(
+    data_absolute_path,
+    'restructured/mean_image.jpg')
+
+vocab_json = os.path.join(
+    data_absolute_path,
+    'restructured/vocab_subset.json')
+
+num_object_labels = 1000
+num_attribute_labels = 1000
+
+# Regions data partition
+# First 80% meant to be used for training
+# Next 10% is set aside for validation
+# Last 10% is to be used for testing
+num_total_regions = 1951768
+num_train_regions = 1561416 # First 80%
+num_val_regions = 195176 # Next 10%
+num_test_regions = num_total_regions \
+                   - num_train_regions \
+                   - num_val_regions 
+
+# Pretrained resnet ckpt
+resnet_ckpt = '/home/nfs/tgupta6/data/Resnet/' + \
+              'ResNet-L50.ckpt'
+
+# Pretrained word vectors
+word2vec_binary = '/home/nfs/tgupta6/data/word_vectors/' + \
+                  'GoogleNews-vectors-negative300.bin'
+
+word_vector_size = 300
+resnet_feat_dim = 2048
+
+# Numpy matrix storing vocabulary word vectors
+pretrained_vocab_word_vectors_npy = os.path.join(
+    data_absolute_path,
+    'restructured/pretrained_vocab_word_vectors.npy')
+
+# Object Attribute Classifier Training Params
+region_batch_size = 200
+region_num_samples = num_train_regions
+region_num_epochs = 4
+region_offset = 0
+region_queue_size = 400
+region_regularization_coeff = 1e-4
+region_lr = 1e-3
+region_log_every_n_iter = 500
+region_output_dir = os.path.join(
+    global_experiment_dir,
+    'object_attribute_classifiers')
+    
+mkdir_if_not_exists(region_output_dir)
+
+region_model = os.path.join(
+    region_output_dir,
+    'model')                    
+
+# Object Attribute Finetuning Params
+region_fine_tune_from_iter = 3000
+region_fine_tune_from = region_model + '-' + str(region_fine_tune_from_iter)
+
+# Object Attribute Classifier Evaluation Params
+region_eval_on = 'val' # One of {'val','test','train'}
+region_model_to_eval = region_model + '-' + '77500'
+
+region_attribute_scores_dirname = os.path.join(
+    region_output_dir,
+    'attribute_scores')
+
+mkdir_if_not_exists(region_attribute_scores_dirname)
+
+# Answer prediction
+num_region_proposals = 100
+num_mcq_candidates = 18
+num_negative_answers = num_mcq_candidates - 1
+
+# VQA data paths
+vqa_basedir = '/home/nfs/tgupta6/data/VQA/'
+
+vqa_train_image_dir = os.path.join(
+    vqa_basedir,
+    'train2014_cropped_large')
+vqa_train_resnet_feat_dir = os.path.join(
+    vqa_basedir,
+    'train2014_cropped_large_resnet_features')
+vqa_train_anno = os.path.join(
+    vqa_basedir,
+    'mscoco_train2014_annotations_with_parsed_questions.json')
+vqa_train_subset_qids = os.path.join(
+    vqa_basedir,
+    'train_subset_qids.json')
+vqa_train_held_out_qids = os.path.join(
+    vqa_basedir,
+    'train_held_out_qids.json') 
+
+vqa_val_image_dir = os.path.join(
+    vqa_basedir,
+    'val2014_cropped_large')
+vqa_val_resnet_feat_dir = os.path.join(
+    vqa_basedir,
+    'val2014_cropped_large_resnet_features')
+vqa_val_anno = os.path.join(
+    vqa_basedir,
+    'mscoco_val2014_annotations_with_parsed_questions.json')
+vqa_val_qids = os.path.join(
+    vqa_basedir,
+    'val_qids.json')
+
+vqa_answer_vocab_json = os.path.join(
+    vqa_basedir,
+    'answer_vocab.json')
+
+# VQA dataset params
+# num_train_questions = 248349
+# num_val_subset_questions = 10000
+# num_val_questions = 121512
+# num_val_rest_questions = num_val_questions - num_val_subset_questions
+# num_test_questions = 0
+
+# Answer classifier training params
+answer_batch_size = 50
+answer_num_epochs = 10
+answer_offset = 0
+answer_obj_atr_loss_wt = 0.1
+answer_regularization_coeff = 1e-5
+answer_queue_size = 500
+answer_embedding_dim = 600
+answer_lr = 1e-4
+answer_log_every_n_iter = 500
+answer_output_dir = os.path.join(
+    global_experiment_dir,
+    'answer_classifiers')
+    
+mkdir_if_not_exists(answer_output_dir)
+
+pretrained_model = '/home/nfs/tgupta6/projects/GenVQA/Exp_Results/' +\
+                   'pretrained_object_attribute_classifier/' +\
+                   'obj_atr_model_77500'
+
+answer_model = os.path.join(
+    answer_output_dir,
+    'model')
+
+# Answer classifier additional joint training params
+num_regions_with_labels = 100
+
+# Answer fine tune params
+answer_fine_tune_from_iter = 22500
+answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter)
+
+# Answer eval params
+answer_eval_on = 'val'
+answer_model_to_eval = answer_model + '-69500'
+
+answer_eval_data_json = os.path.join(
+    answer_output_dir,
+    'eval_' + answer_eval_on + '_data.json')
+
+answer_eval_results_json = os.path.join(
+    answer_output_dir,
+    'eval_' + answer_eval_on + '_results.json')
+
+# Select best model
+models_dir = answer_output_dir
+start_model = 1000
+step_size = 2000
+model_accuracies_txt = os.path.join(
+    answer_output_dir,
+    'model_accuracies.txt')
diff --git a/constants_vision_gpu_2.py b/constants_vision_gpu_2.py
index c19799a77654f0250e010c98b083580367aa63fe..aa195ba13d2ec704bef230a185a42b23a028c1f6 100644
--- a/constants_vision_gpu_2.py
+++ b/constants_vision_gpu_2.py
@@ -5,7 +5,7 @@ def mkdir_if_not_exists(dir_name):
     if not os.path.exists(dir_name):
         os.mkdir(dir_name)
         
-experiment_name = 'QA_explicit_dot_joint_training_pretrained_multi_rate'
+experiment_name = 'QA_explicit_dot_joint_training_pretrained_fix_pretrained'
 #experiment_name = 'object_attribute_classifier_large_images'
 # Global output directory (all subexperiments will be saved here)
 global_output_dir = '/data/tanmay/GenVQA_Exp_Results'
@@ -110,7 +110,7 @@ region_fine_tune_from_iter = 3000
 region_fine_tune_from = region_model + '-' + str(region_fine_tune_from_iter)
 
 # Object Attribute Classifier Evaluation Params
-region_eval_on = 'train' # One of {'val','test','train'}
+region_eval_on = 'val' # One of {'val','test','train'}
 region_model_to_eval = region_model + '-' + '77500'
 
 region_attribute_scores_dirname = os.path.join(
@@ -136,6 +136,12 @@ vqa_train_resnet_feat_dir = os.path.join(
 vqa_train_anno = os.path.join(
     vqa_basedir,
     'mscoco_train2014_annotations_with_parsed_questions.json')
+vqa_train_subset_qids = os.path.join(
+    vqa_basedir,
+    'train_subset_qids.json')
+vqa_train_held_out_qids = os.path.join(
+    vqa_basedir,
+    'train_held_out_qids.json') 
 
 vqa_val_image_dir = os.path.join(
     vqa_basedir,
@@ -146,6 +152,9 @@ vqa_val_resnet_feat_dir = os.path.join(
 vqa_val_anno = os.path.join(
     vqa_basedir,
     'mscoco_val2014_annotations_with_parsed_questions.json')
+vqa_val_qids = os.path.join(
+    vqa_basedir,
+    'val_qids.json')
 
 vqa_answer_vocab_json = os.path.join(
     vqa_basedir,
@@ -166,8 +175,7 @@ answer_obj_atr_loss_wt = 0.1
 answer_regularization_coeff = 1e-5
 answer_queue_size = 500
 answer_embedding_dim = 600
-obj_atr_lr = 1e-4
-answer_lr = 1e-3
+answer_lr = 1e-4
 answer_log_every_n_iter = 500
 answer_output_dir = os.path.join(
     global_experiment_dir,
@@ -187,12 +195,12 @@ answer_model = os.path.join(
 num_regions_with_labels = 100
 
 # Answer fine tune params
-answer_fine_tune_from_iter = 19500
+answer_fine_tune_from_iter = 18500
 answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter)
 
 # Answer eval params
 answer_eval_on = 'val'
-answer_model_to_eval = answer_model + '-13000'
+answer_model_to_eval = answer_model + '-18500'
 
 answer_eval_data_json = os.path.join(
     answer_output_dir,
diff --git a/object_attribute_classifier_cached_features/eval.py b/object_attribute_classifier_cached_features/eval.py
index 4d5cb0d751f0983a9ec863897191e59a8c1054a8..48ba6b9d826a070f8d0b3353e97adafc4c9a6313 100644
--- a/object_attribute_classifier_cached_features/eval.py
+++ b/object_attribute_classifier_cached_features/eval.py
@@ -232,7 +232,7 @@ def eval(
             eval_vars_dict = {
                 var_name: eval_var for var_name, eval_var in
                 zip(vars_to_eval_names, eval_vars)}
-           # print batch['region_ids']
+            # print batch['region_ids']
             labels = dict()
             labels['objects'] = batch['object_labels']
             labels['attributes'] = batch['attribute_labels']