From 3758e7a56659eb40623c64c86a26990315f535e4 Mon Sep 17 00:00:00 2001
From: tgupta6 <tgupta6@illinois.edu>
Date: Wed, 28 Sep 2016 20:24:29 -0500
Subject: [PATCH] obj atr with mil and ans loss working and also lr decay

---
 .../inference.py                              | 70 ++++++++--------
 answer_classifier_cached_features/train.py    | 81 ++++++++++++++++---
 constants_crunchy.py                          |  8 +-
 .../inference.py                              | 27 ++++---
 4 files changed, 126 insertions(+), 60 deletions(-)

diff --git a/answer_classifier_cached_features/inference.py b/answer_classifier_cached_features/inference.py
index 4c0b777..7383463 100644
--- a/answer_classifier_cached_features/inference.py
+++ b/answer_classifier_cached_features/inference.py
@@ -51,7 +51,8 @@ class AnswerInference():
                     noun_embed,
                     adjective_embed)
             
-            self.per_region_answer_scores = [None]*self.batch_size
+#            self.per_region_answer_scores = [None]*self.batch_size
+            obj_atr_qa_feat = [None]*self.batch_size
             for j in xrange(self.batch_size):
                 if j==0:
                     reuse_vars = False
@@ -76,41 +77,42 @@ class AnswerInference():
                     a_feat,
                     [self.num_regions, 1, 1])
 
-                obj_atr_qa_feat = tf.concat(
+                obj_atr_qa_feat[j] = tf.concat(
                     2,
                     [self.selected_noun_adjective[j], q_feat, a_feat])
 
-                obj_atr_qa_feat = tf.expand_dims(
-                    obj_atr_qa_feat,
-                    0)
-
-                print obj_atr_qa_feat.get_shape()
+                # obj_atr_qa_feat[j] = tf.expand_dims(
+                #     obj_atr_qa_feat[j],
+                #     0)
 
-                self.per_region_answer_scores[j] = layers.conv2d(
-                    obj_atr_qa_feat,
-                    1,
-                    2500,
-                    'per_region_ans_score_conv_1',
-                    func = None,
-                    reuse_vars = reuse_vars)
+            obj_atr_qa_feat = tf.pack(obj_atr_qa_feat)
+            print obj_atr_qa_feat.get_shape()
 
-                self.per_region_answer_scores[j] = tf.nn.relu(
-                    layers.batch_norm(
-                        self.per_region_answer_scores[j],
-                        tf.constant(self.is_training),
-                        reuse_vars = reuse_vars))                
-
-                self.per_region_answer_scores[j] = layers.conv2d(
-                    self.per_region_answer_scores[j],
-                    1,
-                    1,
-                    'per_region_ans_score_conv_2',
-                    func = None,
-                    reuse_vars = reuse_vars)
+            self.per_region_answer_scores = layers.conv2d(
+                obj_atr_qa_feat,
+                1,
+                2500,
+                'per_region_ans_score_conv_1',
+                func = None)
+            
+            self.per_region_answer_scores = tf.nn.relu(
+                layers.batch_norm(
+                    self.per_region_answer_scores,
+                    tf.constant(self.is_training)))                
+
+            self.per_region_answer_scores = layers.conv2d(
+                self.per_region_answer_scores,
+                1,
+                1,
+                'per_region_ans_score_conv_2',
+                func = None)
                 
-                self.per_region_answer_scores[j] = tf.squeeze(
-                    self.per_region_answer_scores[j],
-                    [0,3])
+            print self.per_region_answer_scores.get_shape()
+            self.per_region_answer_scores = tf.squeeze(
+                self.per_region_answer_scores,
+                [3])
+            
+            self.per_region_answer_scores = tf.unpack(self.per_region_answer_scores)
 
             self.per_region_answer_prob = [None]*self.batch_size
             self.answer_score = [None]*self.batch_size
@@ -139,13 +141,17 @@ class AnswerInference():
             scores2 = tf.nn.softmax(scores)
             feat2 = tf.matmul(scores2, adjective_embed[k])
 
-            scores1_ = tf.matmul(obj_feat, tf.transpose(feat1))
-            scores2_ = tf.matmul(atr_feat, tf.transpose(feat2))
+            scores1_ = tf.reduce_sum(obj_feat*feat1,1,keep_dims=True)
+            scores2_ = tf.reduce_sum(atr_feat*feat2,1,keep_dims=True)
+            # scores1_ = tf.matmul(obj_feat, tf.transpose(feat1))
+            # scores2_ = tf.matmul(atr_feat, tf.transpose(feat2))
 
             feat = tf.concat(1, [feat1, feat2, scores1_, scores2_])
 
             feats.append(feat)
 
+        print 'feat1 {}'.format(feat1.get_shape())
+        print 'scores1_ {}'.format(scores1_.get_shape())
         feats = tf.transpose(tf.pack(feats), [1,0,2])
 
         return feats
diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py
index 8870246..3ce1c8e 100644
--- a/answer_classifier_cached_features/train.py
+++ b/answer_classifier_cached_features/train.py
@@ -383,7 +383,8 @@ class graph_creator():
                 self.answer_inference.answer_score[j],
                 y)
         self.answer_loss /= self.batch_size
-        
+        self.answer_loss *= self.ans_loss_wt
+
         if self.training:
             self.object_loss = self.obj_atr_loss_wt*losses.object_loss(
                 #self.obj_atr_inference.object_scores,
@@ -417,7 +418,7 @@ class graph_creator():
 
                 self.mil_atr_loss += losses.mil_loss(
                     self.attribute_scores_with_answers[j],
-                    self.plh['positive_nouns_vec_enc'][j],
+                    self.plh['positive_attributes_vec_enc'][j],
                     'atr')
 
             self.mil_obj_loss = self.mil_loss_wt*self.mil_obj_loss / self.batch_size
@@ -469,6 +470,7 @@ class graph_creator():
 
     def add_accuracy_computation(self):
         with tf.variable_scope('accuracy_graph'):
+            # answer
             self.answer_accuracy, self.answer_accuracy_ema, \
                 self.update_answer_accuracy_op = \
                     self.add_answer_accuracy_computation(
@@ -481,6 +483,24 @@ class graph_creator():
                 answer_accuracy_summary = tf.scalar_summary(
                     ["accuracy_answer"],
                     self.moving_average_accuracy)
+
+            # object
+            self.object_accuracy = self.add_object_accuracy_computation(
+                self.object_scores_with_labels,
+                self.plh['object_labels'])
+
+            object_accuracy_summary = tf.scalar_summary(
+                "accuracy_object", 
+                self.object_accuracy)
+
+            # attributes
+            self.attribute_accuracy = self.add_attribute_accuracy_computation(
+                self.attribute_scores_with_labels,
+                self.plh['attribute_labels'])
+
+            attribute_accuracy_summary = tf.scalar_summary(
+                "accuracy_attribute", 
+                self.attribute_accuracy)
             
     def add_answer_accuracy_computation(self, scores):
         with tf.variable_scope('answer_accuracy'):
@@ -496,13 +516,44 @@ class graph_creator():
 
         return accuracy, ema, update_accuracy_op
 
+    def add_object_accuracy_computation(self, scores, labels):
+        with tf.variable_scope('object_accuracy'):
+            correct_prediction = tf.equal(
+                tf.argmax(scores, 1), 
+                tf.argmax(labels, 1), 
+                name='correct_prediction')
+
+            object_accuracy =  tf.reduce_mean(
+                tf.cast(correct_prediction, tf.float32), 
+                name='accuracy')
+
+        return object_accuracy
+
+    def add_attribute_accuracy_computation(self, scores, labels):
+        with tf.variable_scope('object_accuracy'):
+            thresholded = tf.greater(
+                scores, 
+                0.0, 
+                name='thresholded')
+
+            correct_prediction = tf.equal(
+                thresholded,
+                tf.cast(labels, tf.bool),
+                name = 'correct_prediction')
+
+            attribute_accuracy = tf.reduce_mean(
+                tf.cast(correct_prediction, tf.float32), 
+                name='accuracy')
+
+        return attribute_accuracy
+
     def collect_variables(self):
         self.word_vec_vars = var_collect.collect_scope('word_vectors')
         self.resnet_vars = self.obj_atr_inference.resnet_vars
         self.object_attribute_vars = \
             var_collect.collect_scope('object_graph') + \
-            var_collect.collect_scope('attribute_graph') + \
-            var_collect.collect_scope('bn')
+            var_collect.collect_scope('attribute_graph')
+            #var_collect.collect_scope('bn')
         self.answer_vars = var_collect.collect_scope('answer_graph')
 
 def create_initializer(graph, sess, model):
@@ -613,13 +664,15 @@ def create_batch_generator():
 
 
 class attach_optimizer():
-    def __init__(self, graph, lr):
+    def __init__(self, graph, lr, decay_step=24000, decay_rate=0.5):
         self.graph = graph
         self.lr = lr
+        self.decay_step = decay_step
+        self.decay_rate = decay_rate
         with graph.tf_graph.as_default():
             all_trainable_vars = tf.trainable_variables()
 
-            self.not_to_train = graph.object_attribute_vars + graph.word_vec_vars
+            self.not_to_train = []#graph.object_attribute_vars + graph.word_vec_vars
 
             vars_to_train = [
                 var for var in all_trainable_vars
@@ -632,18 +685,24 @@ class attach_optimizer():
 
             all_vars = tf.all_variables()
             self.ops = dict()
+
+            self.global_step = tf.Variable(0, trainable=False)
+            self.learning_rate = tf.train.exponential_decay(
+                self.lr, 
+                self.global_step,
+                self.decay_step)
             
             self.optimizer = multi_rate_train.MultiRateOptimizer(
                 tf.train.AdamOptimizer)
 
-            self.optimizer.add_variables(
-                self.graph.object_attribute_vars + self.graph.word_vec_vars,
-                learning_rate = 1.0*self.lr)
+            # self.optimizer.add_variables(
+            #     self.graph.object_attribute_vars + self.graph.word_vec_vars,
+            #     learning_rate = 1.0*self.lr)
 
             
             self.optimizer.add_variables(
                 vars_to_train,
-                learning_rate = self.lr)
+                learning_rate = self.learning_rate)
 
             self.train_op = self.optimizer.minimize(graph.total_loss)
                 
@@ -794,7 +853,7 @@ class log_mgr():
                 eval_vars_dict['per_region_answer_prob'].shape)
             print np.max(eval_vars_dict['per_region_answer_prob'])
 
-        if iter % self.log_every_n_iter==0 or is_last:
+        if (iter % self.log_every_n_iter==0 or is_last) and (iter!=0):
             self.model_saver.save(
                 self.sess, 
                 self.model_path, 
diff --git a/constants_crunchy.py b/constants_crunchy.py
index d3864c9..b201bd0 100644
--- a/constants_crunchy.py
+++ b/constants_crunchy.py
@@ -101,7 +101,7 @@ pretrained_vocab_word_vectors_npy = os.path.join(
 
 # Object Attribute Classifier Training Params
 region_batch_size = 200
-region_num_epochs = 6
+region_num_epochs = 20
 region_queue_size = 400
 region_regularization_coeff = 1e-5
 region_lr = 1e-3
@@ -188,8 +188,8 @@ vqa_answer_vocab_json = os.path.join(
 # num_test_questions = 0
 
 # Answer classifier training params
-answer_batch_size = 50
-answer_num_epochs = 6
+answer_batch_size = 25
+answer_num_epochs = 20
 answer_offset = 0
 answer_obj_atr_loss_wt = 1.0
 answer_ans_loss_wt = 0.1
@@ -197,7 +197,7 @@ answer_mil_loss_wt = 0.2
 answer_regularization_coeff = 1e-5
 answer_queue_size = 500
 answer_embedding_dim = 600
-answer_lr = 1.0*1e-3
+answer_lr = 1e-3
 answer_log_every_n_iter = 500
 answer_train_from_scratch = True
 answer_output_dir = os.path.join(
diff --git a/object_attribute_classifier_cached_features/inference.py b/object_attribute_classifier_cached_features/inference.py
index f19a9ae..8a1887f 100644
--- a/object_attribute_classifier_cached_features/inference.py
+++ b/object_attribute_classifier_cached_features/inference.py
@@ -18,14 +18,15 @@ class ObjectAttributeInference():
             training):
 
         self.image_feats = image_feats
+
         self.object_label_vectors = object_label_vectors
         self.attribute_label_vectors = attribute_label_vectors
         self.training = training
         self.avg_pool_feat = self.image_feats
 
-        self.avg_pool_feat = layers.batch_norm(
-            self.avg_pool_feat,
-            tf.constant(self.training))
+        # self.avg_pool_feat = layers.batch_norm(
+        #     self.avg_pool_feat,
+        #     tf.constant(self.training))
 
         self.resnet_vars = self.get_resnet_vars()
 
@@ -58,7 +59,7 @@ class ObjectAttributeInference():
         with tf.variable_scope('object_graph') as object_graph:
             with tf.variable_scope('fc1') as fc1:
                 in_dim = input.get_shape().as_list()[-1]
-                out_dim = in_dim/2
+                out_dim = in_dim
                 fc1_out = layers.full(
                     input,
                     out_dim,
@@ -77,10 +78,10 @@ class ObjectAttributeInference():
                     out_dim,
                     'fc',
                     func = None)
-                fc2_out = layers.batch_norm(
-                    fc2_out,
-                    tf.constant(self.training))
-                fc2_out = tf.nn.relu(fc2_out)
+                # fc2_out = layers.batch_norm(
+                #     fc2_out,
+                #     tf.constant(self.training))
+                # fc2_out = tf.nn.relu(fc2_out)
 
         return fc2_out
 
@@ -88,7 +89,7 @@ class ObjectAttributeInference():
         with tf.variable_scope('attribute_graph') as attribute_graph:
             with tf.variable_scope('fc1') as fc1:
                 in_dim = input.get_shape().as_list()[-1]
-                out_dim = in_dim/2
+                out_dim = in_dim
                 fc1_out = layers.full(
                     input,
                     out_dim,
@@ -107,10 +108,10 @@ class ObjectAttributeInference():
                     out_dim,
                     'fc',
                     func = None)
-                fc2_out = layers.batch_norm(
-                    fc2_out,
-                    tf.constant(self.training))
-                fc2_out = tf.nn.relu(fc2_out)
+                # fc2_out = layers.batch_norm(
+                #     fc2_out,
+                #     tf.constant(self.training))
+                # fc2_out = tf.nn.relu(fc2_out)
         
         return fc2_out
     
-- 
GitLab