From 3758e7a56659eb40623c64c86a26990315f535e4 Mon Sep 17 00:00:00 2001 From: tgupta6 <tgupta6@illinois.edu> Date: Wed, 28 Sep 2016 20:24:29 -0500 Subject: [PATCH] obj atr with mil and ans loss working and also lr decay --- .../inference.py | 70 ++++++++-------- answer_classifier_cached_features/train.py | 81 ++++++++++++++++--- constants_crunchy.py | 8 +- .../inference.py | 27 ++++--- 4 files changed, 126 insertions(+), 60 deletions(-) diff --git a/answer_classifier_cached_features/inference.py b/answer_classifier_cached_features/inference.py index 4c0b777..7383463 100644 --- a/answer_classifier_cached_features/inference.py +++ b/answer_classifier_cached_features/inference.py @@ -51,7 +51,8 @@ class AnswerInference(): noun_embed, adjective_embed) - self.per_region_answer_scores = [None]*self.batch_size +# self.per_region_answer_scores = [None]*self.batch_size + obj_atr_qa_feat = [None]*self.batch_size for j in xrange(self.batch_size): if j==0: reuse_vars = False @@ -76,41 +77,42 @@ class AnswerInference(): a_feat, [self.num_regions, 1, 1]) - obj_atr_qa_feat = tf.concat( + obj_atr_qa_feat[j] = tf.concat( 2, [self.selected_noun_adjective[j], q_feat, a_feat]) - obj_atr_qa_feat = tf.expand_dims( - obj_atr_qa_feat, - 0) - - print obj_atr_qa_feat.get_shape() + # obj_atr_qa_feat[j] = tf.expand_dims( + # obj_atr_qa_feat[j], + # 0) - self.per_region_answer_scores[j] = layers.conv2d( - obj_atr_qa_feat, - 1, - 2500, - 'per_region_ans_score_conv_1', - func = None, - reuse_vars = reuse_vars) + obj_atr_qa_feat = tf.pack(obj_atr_qa_feat) + print obj_atr_qa_feat.get_shape() - self.per_region_answer_scores[j] = tf.nn.relu( - layers.batch_norm( - self.per_region_answer_scores[j], - tf.constant(self.is_training), - reuse_vars = reuse_vars)) - - self.per_region_answer_scores[j] = layers.conv2d( - self.per_region_answer_scores[j], - 1, - 1, - 'per_region_ans_score_conv_2', - func = None, - reuse_vars = reuse_vars) + self.per_region_answer_scores = layers.conv2d( + obj_atr_qa_feat, + 1, + 2500, + 'per_region_ans_score_conv_1', + func = None) + + self.per_region_answer_scores = tf.nn.relu( + layers.batch_norm( + self.per_region_answer_scores, + tf.constant(self.is_training))) + + self.per_region_answer_scores = layers.conv2d( + self.per_region_answer_scores, + 1, + 1, + 'per_region_ans_score_conv_2', + func = None) - self.per_region_answer_scores[j] = tf.squeeze( - self.per_region_answer_scores[j], - [0,3]) + print self.per_region_answer_scores.get_shape() + self.per_region_answer_scores = tf.squeeze( + self.per_region_answer_scores, + [3]) + + self.per_region_answer_scores = tf.unpack(self.per_region_answer_scores) self.per_region_answer_prob = [None]*self.batch_size self.answer_score = [None]*self.batch_size @@ -139,13 +141,17 @@ class AnswerInference(): scores2 = tf.nn.softmax(scores) feat2 = tf.matmul(scores2, adjective_embed[k]) - scores1_ = tf.matmul(obj_feat, tf.transpose(feat1)) - scores2_ = tf.matmul(atr_feat, tf.transpose(feat2)) + scores1_ = tf.reduce_sum(obj_feat*feat1,1,keep_dims=True) + scores2_ = tf.reduce_sum(atr_feat*feat2,1,keep_dims=True) + # scores1_ = tf.matmul(obj_feat, tf.transpose(feat1)) + # scores2_ = tf.matmul(atr_feat, tf.transpose(feat2)) feat = tf.concat(1, [feat1, feat2, scores1_, scores2_]) feats.append(feat) + print 'feat1 {}'.format(feat1.get_shape()) + print 'scores1_ {}'.format(scores1_.get_shape()) feats = tf.transpose(tf.pack(feats), [1,0,2]) return feats diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py index 8870246..3ce1c8e 100644 --- a/answer_classifier_cached_features/train.py +++ b/answer_classifier_cached_features/train.py @@ -383,7 +383,8 @@ class graph_creator(): self.answer_inference.answer_score[j], y) self.answer_loss /= self.batch_size - + self.answer_loss *= self.ans_loss_wt + if self.training: self.object_loss = self.obj_atr_loss_wt*losses.object_loss( #self.obj_atr_inference.object_scores, @@ -417,7 +418,7 @@ class graph_creator(): self.mil_atr_loss += losses.mil_loss( self.attribute_scores_with_answers[j], - self.plh['positive_nouns_vec_enc'][j], + self.plh['positive_attributes_vec_enc'][j], 'atr') self.mil_obj_loss = self.mil_loss_wt*self.mil_obj_loss / self.batch_size @@ -469,6 +470,7 @@ class graph_creator(): def add_accuracy_computation(self): with tf.variable_scope('accuracy_graph'): + # answer self.answer_accuracy, self.answer_accuracy_ema, \ self.update_answer_accuracy_op = \ self.add_answer_accuracy_computation( @@ -481,6 +483,24 @@ class graph_creator(): answer_accuracy_summary = tf.scalar_summary( ["accuracy_answer"], self.moving_average_accuracy) + + # object + self.object_accuracy = self.add_object_accuracy_computation( + self.object_scores_with_labels, + self.plh['object_labels']) + + object_accuracy_summary = tf.scalar_summary( + "accuracy_object", + self.object_accuracy) + + # attributes + self.attribute_accuracy = self.add_attribute_accuracy_computation( + self.attribute_scores_with_labels, + self.plh['attribute_labels']) + + attribute_accuracy_summary = tf.scalar_summary( + "accuracy_attribute", + self.attribute_accuracy) def add_answer_accuracy_computation(self, scores): with tf.variable_scope('answer_accuracy'): @@ -496,13 +516,44 @@ class graph_creator(): return accuracy, ema, update_accuracy_op + def add_object_accuracy_computation(self, scores, labels): + with tf.variable_scope('object_accuracy'): + correct_prediction = tf.equal( + tf.argmax(scores, 1), + tf.argmax(labels, 1), + name='correct_prediction') + + object_accuracy = tf.reduce_mean( + tf.cast(correct_prediction, tf.float32), + name='accuracy') + + return object_accuracy + + def add_attribute_accuracy_computation(self, scores, labels): + with tf.variable_scope('object_accuracy'): + thresholded = tf.greater( + scores, + 0.0, + name='thresholded') + + correct_prediction = tf.equal( + thresholded, + tf.cast(labels, tf.bool), + name = 'correct_prediction') + + attribute_accuracy = tf.reduce_mean( + tf.cast(correct_prediction, tf.float32), + name='accuracy') + + return attribute_accuracy + def collect_variables(self): self.word_vec_vars = var_collect.collect_scope('word_vectors') self.resnet_vars = self.obj_atr_inference.resnet_vars self.object_attribute_vars = \ var_collect.collect_scope('object_graph') + \ - var_collect.collect_scope('attribute_graph') + \ - var_collect.collect_scope('bn') + var_collect.collect_scope('attribute_graph') + #var_collect.collect_scope('bn') self.answer_vars = var_collect.collect_scope('answer_graph') def create_initializer(graph, sess, model): @@ -613,13 +664,15 @@ def create_batch_generator(): class attach_optimizer(): - def __init__(self, graph, lr): + def __init__(self, graph, lr, decay_step=24000, decay_rate=0.5): self.graph = graph self.lr = lr + self.decay_step = decay_step + self.decay_rate = decay_rate with graph.tf_graph.as_default(): all_trainable_vars = tf.trainable_variables() - self.not_to_train = graph.object_attribute_vars + graph.word_vec_vars + self.not_to_train = []#graph.object_attribute_vars + graph.word_vec_vars vars_to_train = [ var for var in all_trainable_vars @@ -632,18 +685,24 @@ class attach_optimizer(): all_vars = tf.all_variables() self.ops = dict() + + self.global_step = tf.Variable(0, trainable=False) + self.learning_rate = tf.train.exponential_decay( + self.lr, + self.global_step, + self.decay_step) self.optimizer = multi_rate_train.MultiRateOptimizer( tf.train.AdamOptimizer) - self.optimizer.add_variables( - self.graph.object_attribute_vars + self.graph.word_vec_vars, - learning_rate = 1.0*self.lr) + # self.optimizer.add_variables( + # self.graph.object_attribute_vars + self.graph.word_vec_vars, + # learning_rate = 1.0*self.lr) self.optimizer.add_variables( vars_to_train, - learning_rate = self.lr) + learning_rate = self.learning_rate) self.train_op = self.optimizer.minimize(graph.total_loss) @@ -794,7 +853,7 @@ class log_mgr(): eval_vars_dict['per_region_answer_prob'].shape) print np.max(eval_vars_dict['per_region_answer_prob']) - if iter % self.log_every_n_iter==0 or is_last: + if (iter % self.log_every_n_iter==0 or is_last) and (iter!=0): self.model_saver.save( self.sess, self.model_path, diff --git a/constants_crunchy.py b/constants_crunchy.py index d3864c9..b201bd0 100644 --- a/constants_crunchy.py +++ b/constants_crunchy.py @@ -101,7 +101,7 @@ pretrained_vocab_word_vectors_npy = os.path.join( # Object Attribute Classifier Training Params region_batch_size = 200 -region_num_epochs = 6 +region_num_epochs = 20 region_queue_size = 400 region_regularization_coeff = 1e-5 region_lr = 1e-3 @@ -188,8 +188,8 @@ vqa_answer_vocab_json = os.path.join( # num_test_questions = 0 # Answer classifier training params -answer_batch_size = 50 -answer_num_epochs = 6 +answer_batch_size = 25 +answer_num_epochs = 20 answer_offset = 0 answer_obj_atr_loss_wt = 1.0 answer_ans_loss_wt = 0.1 @@ -197,7 +197,7 @@ answer_mil_loss_wt = 0.2 answer_regularization_coeff = 1e-5 answer_queue_size = 500 answer_embedding_dim = 600 -answer_lr = 1.0*1e-3 +answer_lr = 1e-3 answer_log_every_n_iter = 500 answer_train_from_scratch = True answer_output_dir = os.path.join( diff --git a/object_attribute_classifier_cached_features/inference.py b/object_attribute_classifier_cached_features/inference.py index f19a9ae..8a1887f 100644 --- a/object_attribute_classifier_cached_features/inference.py +++ b/object_attribute_classifier_cached_features/inference.py @@ -18,14 +18,15 @@ class ObjectAttributeInference(): training): self.image_feats = image_feats + self.object_label_vectors = object_label_vectors self.attribute_label_vectors = attribute_label_vectors self.training = training self.avg_pool_feat = self.image_feats - self.avg_pool_feat = layers.batch_norm( - self.avg_pool_feat, - tf.constant(self.training)) + # self.avg_pool_feat = layers.batch_norm( + # self.avg_pool_feat, + # tf.constant(self.training)) self.resnet_vars = self.get_resnet_vars() @@ -58,7 +59,7 @@ class ObjectAttributeInference(): with tf.variable_scope('object_graph') as object_graph: with tf.variable_scope('fc1') as fc1: in_dim = input.get_shape().as_list()[-1] - out_dim = in_dim/2 + out_dim = in_dim fc1_out = layers.full( input, out_dim, @@ -77,10 +78,10 @@ class ObjectAttributeInference(): out_dim, 'fc', func = None) - fc2_out = layers.batch_norm( - fc2_out, - tf.constant(self.training)) - fc2_out = tf.nn.relu(fc2_out) + # fc2_out = layers.batch_norm( + # fc2_out, + # tf.constant(self.training)) + # fc2_out = tf.nn.relu(fc2_out) return fc2_out @@ -88,7 +89,7 @@ class ObjectAttributeInference(): with tf.variable_scope('attribute_graph') as attribute_graph: with tf.variable_scope('fc1') as fc1: in_dim = input.get_shape().as_list()[-1] - out_dim = in_dim/2 + out_dim = in_dim fc1_out = layers.full( input, out_dim, @@ -107,10 +108,10 @@ class ObjectAttributeInference(): out_dim, 'fc', func = None) - fc2_out = layers.batch_norm( - fc2_out, - tf.constant(self.training)) - fc2_out = tf.nn.relu(fc2_out) + # fc2_out = layers.batch_norm( + # fc2_out, + # tf.constant(self.training)) + # fc2_out = tf.nn.relu(fc2_out) return fc2_out -- GitLab