From 5b71bdb802d18c457bf811ace5dba4ea45e858d9 Mon Sep 17 00:00:00 2001
From: tgupta6 <tgupta6@illinois.edu>
Date: Mon, 2 May 2016 10:24:00 -0500
Subject: [PATCH] relevance network with explicit features, ans network with
 margin loss and wordvec sharing

---
 .../answer_classifier/ans_data_io_helper.py   |  43 ++-
 .../answer_classifier/eval_ans_classifier.py  |  90 +++---
 .../answer_classifier/train_ans_classifier.py |  97 +++---
 classifiers/inherit_example.py                |  14 +
 .../region_ranker/eval_rel_classifier.py      |  28 +-
 .../region_ranker/train_rel_classifier.py     |  92 +++---
 classifiers/tf_graph_creation_helper.py       | 289 +++++++++++++++---
 classifiers/train_classifiers.py              |  32 +-
 8 files changed, 493 insertions(+), 192 deletions(-)
 create mode 100644 classifiers/inherit_example.py

diff --git a/classifiers/answer_classifier/ans_data_io_helper.py b/classifiers/answer_classifier/ans_data_io_helper.py
index 50ace34..d5e419b 100644
--- a/classifiers/answer_classifier/ans_data_io_helper.py
+++ b/classifiers/answer_classifier/ans_data_io_helper.py
@@ -81,6 +81,17 @@ def get_vocab(qa_dict):
     return vocab, inv_vocab
 
 
+def join_vocab(vocab, ans_vocab):
+    joint_vocab = vocab.copy()
+    count = len(joint_vocab)
+    for word in ans_vocab.keys():
+        if word not in joint_vocab:
+            joint_vocab[word] = count
+            count += 1
+
+    return joint_vocab
+
+
 def save_regions(image_dir, out_dir, qa_dict, region_anno_dict, start_id, 
                  batch_size, img_width, img_height):
     
@@ -91,7 +102,7 @@ def save_regions(image_dir, out_dir, qa_dict, region_anno_dict, start_id,
     region_shape = np.array([img_height/3, img_width/3], np.int32)
 
     image_done = dict()
-    for i in xrange(batch_size):
+    for i in xrange(start_id, start_id + batch_size):
         image_id = qa_dict[i].image_id
         image_done[image_id] = False
 
@@ -228,9 +239,9 @@ atr_labels = {
 }
 
 
-class feed_dict_creator():
-    def __init__(self, region_images, ans_labels, parsed_q, 
-                 region_score, keep_prob, plholder_dict, vocab):
+class FeedDictCreator():
+    def __init__(self, region_images, parsed_q, 
+             keep_prob, plholder_dict, vocab):
         self.plholder_dict = plholder_dict
         self.parsed_q = parsed_q
         self.vocab = vocab
@@ -238,8 +249,6 @@ class feed_dict_creator():
         self.feed_dict = {
             plholder_dict['image_regions']: region_images,
             plholder_dict['keep_prob']: keep_prob,
-            plholder_dict['gt_answer']: ans_labels,
-            plholder_dict['region_score']: region_score,
         }
         self.add_bin('bin0')
         self.add_bin('bin1')
@@ -281,13 +290,31 @@ class feed_dict_creator():
         containment = np.zeros([num_q, num_labels], dtype='float32')
         for q_num in xrange(num_q):
             for i, label in labels.items():
-                if label in [pq.lower() for pq in self.parsed_q[q_num][bin_name]]:
+                if label in [pq.lower() for pq in \
+                             self.parsed_q[q_num][bin_name]]:
                     containment[q_num,i] = 1
 
         plholder = self.plholder_dict[bin_name + '_' + \
                                       label_type + '_' + 'cont']
         self.feed_dict[plholder] = containment
-        
+
+
+class RelFeedDictCreator(FeedDictCreator):
+    def __init__(self, region_images, parsed_q, 
+                 gt_region_scores, keep_prob, plholder_dict, vocab):
+        FeedDictCreator.__init__(self, region_images, parsed_q,
+                                 keep_prob, plholder_dict, vocab)
+        self.feed_dict[plholder_dict['gt_scores']] = gt_region_scores
+    
+
+class AnsFeedDictCreator(FeedDictCreator):
+    def __init__(self, region_images, ans_labels, parsed_q, 
+                 region_scores, keep_prob, plholder_dict, vocab):
+        FeedDictCreator.__init__(self, region_images, parsed_q,
+                                 keep_prob, plholder_dict, vocab)
+        self.feed_dict[plholder_dict['gt_answer']] = ans_labels
+        self.feed_dict[plholder_dict['region_score']] = region_scores
+
 
 class html_ans_table_writer():
     def __init__(self, filename):
diff --git a/classifiers/answer_classifier/eval_ans_classifier.py b/classifiers/answer_classifier/eval_ans_classifier.py
index f0c6605..248336b 100644
--- a/classifiers/answer_classifier/eval_ans_classifier.py
+++ b/classifiers/answer_classifier/eval_ans_classifier.py
@@ -15,9 +15,9 @@ import region_ranker.perfect_ranker as region_proposer
 import train_ans_classifier as ans_trainer
 from PIL import Image, ImageDraw
 
-def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
+def get_pred(y, qa_anno_dict, region_anno_dict, parsed_q_dict, ans_vocab, vocab,
              image_dir, mean_image, start_index, val_set_size, batch_size,
-             placeholders, img_height, img_width, batch_creator):
+             plholder_dict, img_height, img_width, batch_creator):
 
     inv_ans_vocab = {v: k for k, v in ans_vocab.items()}
     pred_list = []
@@ -30,14 +30,14 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
 
         print('Iter: ' + str(i+1) + '/' + str(max_iter))
 
-        region_images, ans_labels, questions, \
+        region_images, ans_labels, parsed_q, \
         region_score, partition = batch_creator \
             .ans_mini_batch_loader(qa_anno_dict, 
                                    region_anno_dict, 
                                    ans_vocab, vocab, 
                                    image_dir, mean_image, 
                                    start_index+i*batch_size, 
-                                   batch_size_tmp, 
+                                   batch_size_tmp, parsed_q_dict,
                                    img_height, img_width, 3)
             
         if i==max_iter-1:
@@ -48,8 +48,9 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
             residual_region_images = np.zeros(shape=[residual_regions,
                                                      img_height/3, img_width/3, 
                                                      3])
-            residual_questions = np.zeros(shape=[residual_regions, 
-                                                 len(vocab)])
+            # residual_questions = np.zeros(shape=[residual_regions, 
+            #                                      len(vocab)])
+            
             residual_ans_labels = np.zeros(shape=[residual_batch_size, 
                                                   len(ans_vocab)])
             residual_region_score = np.zeros(shape=[1, residual_regions])
@@ -57,19 +58,29 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
             region_images = np.concatenate((region_images, 
                                             residual_region_images),
                                            axis=0)
-            questions = np.concatenate((questions, residual_questions), axis=0)
+#            questions = np.concatenate((questions, residual_questions), axis=0)
+            for k in xrange(batch_size_tmp*22, batch_size*22):
+                parsed_q[k] = {
+                    'bin0': [''],
+                    'bin1': [''],
+                    'bin2': [''],
+                    'bin3': [''],
+                }
+
             ans_labels = np.concatenate((ans_labels, residual_ans_labels), 
                                         axis=0)
             region_score = np.concatenate((region_score, residual_region_score),
                                           axis=1)
 
-        feed_dict = {
-            placeholders[0] : region_images, 
-            placeholders[1] : questions,
-            placeholders[2] : 1.0,
-            placeholders[3] : ans_labels,        
-            placeholders[4] : region_score,
-        }
+        
+        feed_dict = ans_io_helper \
+            .AnsFeedDictCreator(region_images, 
+                                ans_labels, 
+                                parsed_q,
+                                region_score,
+                                1.0, 
+                                plholder_dict,
+                                vocab).feed_dict
 
         ans_ids = np.argmax(y.eval(feed_dict), 1)
         for j in xrange(batch_size_tmp):
@@ -78,13 +89,6 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
                 'answer' : inv_ans_vocab[ans_ids[j]]
             }]
 
-        # g = tf.get_default_graph()
-        # q_feat_op = g.get_operation_by_name('ans/word_embed/q_feat')
-        # q_feat = q_feat_op.outputs[0]
-        # region_feat_op = g.get_operation_by_name('ans/conv2/region_feat')
-        # region_feat = region_feat_op.outputs[0]
-        # pdb.set_trace()
-
     return pred_list
 
 def eval(eval_params):
@@ -92,6 +96,7 @@ def eval(eval_params):
     
     train_anno_filename = eval_params['train_json']
     test_anno_filename = eval_params['test_json']
+    parsed_q_filename = eval_params['parsed_q_json']
     regions_anno_filename = eval_params['regions_json']
     image_regions_dir = eval_params['image_regions_dir']
     outdir = eval_params['outdir']
@@ -104,38 +109,47 @@ def eval(eval_params):
 
     qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename)
     qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename)
+    parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
     region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
     ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
     vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train)
 
     # Create graph
     g = tf.get_default_graph()
-    image_regions, questions, keep_prob, y, region_score= \
-        graph_creator.placeholder_inputs_ans(len(vocab), len(ans_vocab), 
-                                             mode='gt')
-    
+    plholder_dict = graph_creator.placeholder_inputs_ans(len(vocab), 
+                                                         len(ans_vocab), 
+                                                         mode='gt')
+
+    image_regions = plholder_dict['image_regions']
+    questions = plholder_dict['questions']
+    keep_prob = plholder_dict['keep_prob']
+    y = plholder_dict['gt_answer']
+    region_score = plholder_dict['region_score']
+
     y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
     obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
     obj_feat = obj_feat_op.outputs[0]
     y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
     atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
     atr_feat = atr_feat_op.outputs[0]
-    pred_rel_score = graph_creator.rel_comp_graph(image_regions, questions,
+    pred_rel_score = graph_creator.rel_comp_graph(plholder_dict,
+                                                  obj_feat, atr_feat,
                                                   y_pred_obj, y_pred_atr,
                                                   'q_obj_atr_reg',
                                                   1.0, len(vocab), batch_size) 
-    y_pred = graph_creator.ans_comp_graph(image_regions, questions, keep_prob, 
-                                          obj_feat, atr_feat, vocab, 
-                                          inv_vocab, len(ans_vocab), 
-                                          eval_params['mode'])
+    y_pred = graph_creator.ans_comp_margin_graph(plholder_dict, 
+                                                 obj_feat, atr_feat, 
+                                                 y_pred_obj, y_pred_atr,
+                                                 vocab, inv_vocab, ans_vocab, 
+                                                 eval_params['mode'])
     pred_rel_score_vec = tf.reshape(pred_rel_score, 
                                     [1, batch_size*ans_io_helper.num_proposals])
+
     y_avg = graph_creator.aggregate_y_pred(y_pred, pred_rel_score_vec, 
                                            batch_size,  
                                            ans_io_helper.num_proposals, 
                                            len(ans_vocab))
     
-    cross_entropy = graph_creator.loss(y, y_avg)
     accuracy = graph_creator.evaluation(y, y_avg)
 
     # Collect variables
@@ -160,16 +174,15 @@ def eval(eval_params):
     mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
                          'Obj_Classifier/mean_image.npy')
 
-    placeholders = [image_regions, questions, keep_prob, y, region_score]
-
     # Batch creator
     test_batch_creator = ans_io_helper.batch_creator(test_start_id,
                                                      test_start_id 
                                                      + test_set_size - 1)
     # Get predictions
-    pred_dict = get_pred(y_avg, qa_anno_dict, region_anno_dict, ans_vocab, 
+    pred_dict = get_pred(y_avg, qa_anno_dict, region_anno_dict, 
+                         parsed_q_dict, ans_vocab, 
                          vocab, image_regions_dir, mean_image, test_start_id, 
-                         test_set_size, batch_size, placeholders, 75, 75,
+                         test_set_size, batch_size, plholder_dict, 75, 75,
                          test_batch_creator)
 
     json_filename = os.path.join(outdir, 'predicted_ans_' + \
@@ -271,11 +284,12 @@ if __name__=='__main__':
         'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
         'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
         'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
+        'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
         'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
         'image_regions_dir': '/mnt/ramdisk/image_regions',
-        'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_w_Rel',
-        'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob/rel_classifier_q_obj_atr_reg-4',
-        'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_w_Rel/ans_classifier_' + mode + '-' + str(model_num),
+        'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin',
+        'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt/rel_classifier_q_obj_atr_reg_explt-9',
+        'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin/ans_classifier_' + mode + '-' + str(model_num),
         'mode' : mode,
         'batch_size': 20,
         'test_start_id': 94645,
diff --git a/classifiers/answer_classifier/train_ans_classifier.py b/classifiers/answer_classifier/train_ans_classifier.py
index d1dcc3b..f57515c 100644
--- a/classifiers/answer_classifier/train_ans_classifier.py
+++ b/classifiers/answer_classifier/train_ans_classifier.py
@@ -31,9 +31,12 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
         'ans/fc1/W_obj',
         'ans/fc1/W_atr',
         'ans/fc1/W_q',
+        'ans/fc1/W_explt',
         'ans/fc1/b',
-        'ans/fc2/W',
-        'ans/fc2/b'
+        'ans/fc2/W_feat',
+        'ans/fc2/b_feat',
+        'ans/fc2/W_ans',
+        'ans/fc2/b_ans'
     ]
 
     vars_dict = graph_creator.get_list_of_variables(list_of_vars)
@@ -43,8 +46,10 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
         vars_dict['ans/word_embed/word_vecs'],
         vars_dict['ans/fc1/W_q'],
         vars_dict['ans/fc1/b'],
-        vars_dict['ans/fc2/W'],
-        vars_dict['ans/fc2/b'],
+        vars_dict['ans/fc2/W_feat'],
+        vars_dict['ans/fc2/b_feat'],
+        vars_dict['ans/fc2/W_ans'],
+        vars_dict['ans/fc2/b_ans'],
     ]
 
     reg_ans_params = [
@@ -57,6 +62,7 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
     
     obj_ans_params = [
         vars_dict['ans/fc1/W_obj'],
+        vars_dict['ans/fc1/W_explt']
     ]
     
     atr_ans_params = [
@@ -88,9 +94,9 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
     elif mode=='q_obj_atr_reg':
         vars_to_train += reg_ans_params
 
-    if not mode=='q':
-        vars_to_train = [var for var in vars_to_train if \
-                         'ans/word_embed/word_vecs' not in var.name]
+    # if not mode=='q':
+        # vars_to_train = [var for var in vars_to_train if \
+        #                  'ans/word_embed/word_vecs' not in var.name]
 
     # Fine tune begining with a previous model
     if fine_tune==True:
@@ -148,9 +154,9 @@ def evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
                                    img_height, img_width, 3)
             
         feed_dict = ans_io_helper.\
-                    feed_dict_creator(region_images, ans_labels, parsed_q, 
-                                      region_score, 1.0, plholder_dict, 
-                                      vocab).feed_dict
+                    AnsFeedDictCreator(region_images, ans_labels, parsed_q, 
+                                       region_score, 1.0, plholder_dict, 
+                                       vocab).feed_dict
 
         correct = correct + accuracy.eval(feed_dict)
 
@@ -179,6 +185,7 @@ def train(train_params):
     region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
     ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
     vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
+#    vocab = ans_io_helper.join_vocab(vocab, ans_vocab)
 
     # Save region crops
     if train_params['crop_n_save_regions'] == True:
@@ -209,39 +216,38 @@ def train(train_params):
     atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
     atr_feat = atr_feat_op.outputs[0]
 
-    # pred_rel_score = graph_creator.rel_comp_graph(image_regions, questions,
-    #                                               obj_feat, atr_feat, 
-    #                                               'q_obj_atr_reg', 1.0, 
-    #                                               len(vocab), batch_size)
-    
+    pred_rel_score = graph_creator.rel_comp_graph(plholder_dict,
+                                                  obj_feat, atr_feat,
+                                                  y_pred_obj, y_pred_atr, 
+                                                  'q_obj_atr_reg_explt',
+                                                  1.0, len(vocab), batch_size)
 
     # Restore rel, obj and attribute classifier parameters
-#    rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel')
+    rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel')
     obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj')
     atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr')
 
- #   rel_saver = tf.train.Saver(rel_vars)
+    rel_saver = tf.train.Saver(rel_vars)
     obj_atr_saver = tf.train.Saver(obj_vars+atr_vars)
 
-  #  rel_saver.restore(sess, rel_model)
+    rel_saver.restore(sess, rel_model)
     obj_atr_saver.restore(sess, obj_atr_model)
 
-    y_pred = graph_creator.ans_comp_graph(plholder_dict, 
-                                          obj_feat, atr_feat, vocab, 
-                                          inv_vocab, len(ans_vocab), 
-                                          train_params['mode'])
-#    pred_rel_score_vec = tf.reshape(pred_rel_score, 
-    #                                 [1, batch_size*ans_io_helper.num_proposals])
-    # y_avg = graph_creator.aggregate_y_pred(y_pred, 
-    #                                        pred_rel_score_vec, batch_size, 
-    #                                        ans_io_helper.num_proposals, 
-    #                                        len(ans_vocab))
+    y_pred = graph_creator.ans_comp_margin_graph(plholder_dict, 
+                                                 obj_feat, atr_feat, 
+                                                 y_pred_obj, y_pred_atr, 
+                                                 vocab, inv_vocab, ans_vocab, 
+                                                 train_params['mode'])
+    pred_rel_score_vec = tf.reshape(pred_rel_score, 
+                                    [1, batch_size*ans_io_helper.num_proposals])
+
     y_avg = graph_creator.aggregate_y_pred(y_pred, 
-                                           region_score, batch_size, 
+                                           pred_rel_score_vec, batch_size, 
                                            ans_io_helper.num_proposals, 
                                            len(ans_vocab))
     
-    cross_entropy = graph_creator.loss(y, y_avg)
+#    cross_entropy = graph_creator.loss(y, y_avg)
+    margin_loss = graph_creator.margin_loss(y, y_avg, 0.2)
     accuracy = graph_creator.evaluation(y, y_avg)
     
     # Collect variables
@@ -249,7 +255,7 @@ def train(train_params):
     pretrained_vars, vars_to_train, vars_to_restore, vars_to_save, \
         vars_to_init, vars_dict = \
             get_process_flow_vars(train_params['mode'], 
-                                  obj_vars, atr_vars, [], #rel_vars,
+                                  obj_vars, atr_vars, rel_vars,
                                   train_params['fine_tune'])
 
     # Regularizers
@@ -265,9 +271,11 @@ def train(train_params):
         vars_dict['ans/fc1/W_obj'],
         vars_dict['ans/fc1/W_atr'],
         vars_dict['ans/fc1/W_q'],
+        vars_dict['ans/fc1/W_explt'],
     ]
 
-    ans_fc2_params = [vars_dict['ans/fc2/W']]
+    ans_fc2_params = [vars_dict['ans/fc2/W_feat'],
+                      vars_dict['ans/fc2/W_ans']]
 
     regularizer_ans_word_vecs = graph_creator \
         .regularize_params(ans_word_vec_params) 
@@ -277,7 +285,12 @@ def train(train_params):
     regularizer_ans_fcs = graph_creator \
         .regularize_params(ans_fc1_params + ans_fc2_params)
 
-    total_loss = cross_entropy + \
+    # total_loss = margin_loss + \
+    #              1e-5 * regularizer_ans_word_vecs + \
+    #              1e-5 * regularizer_ans_fcs + \
+    #              1e-3 * regularizer_ans_filters
+
+    total_loss = margin_loss + \
                  1e-5 * regularizer_ans_word_vecs + \
                  1e-5 * regularizer_ans_fcs + \
                  1e-3 * regularizer_ans_filters
@@ -323,7 +336,7 @@ def train(train_params):
     # Initialize vars_to_init
     all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
     optimizer_vars = [var for var in all_vars if var not in \
-                      obj_vars + atr_vars + ans_vars] #rel_vars + ans_vars]
+                      obj_vars + atr_vars + rel_vars + ans_vars]
     
     print('Optimizer Variables: ')
     print([var.name for var in optimizer_vars])
@@ -334,7 +347,7 @@ def train(train_params):
     mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
                          'Obj_Classifier/mean_image.npy')
 
-    placeholders = [image_regions, questions, keep_prob, y, region_score]
+#    placeholders = [image_regions, questions, keep_prob, y, region_score]
 
     # Start Training
     max_epoch = train_params['max_epoch']
@@ -381,13 +394,13 @@ def train(train_params):
                                        75, 75, 3)
 
             feed_dict_train = ans_io_helper \
-                .feed_dict_creator(train_region_images, 
-                                   train_ans_labels, 
-                                   train_parsed_q,
-                                   train_region_score,
-                                   0.5, 
-                                   plholder_dict,
-                                   vocab).feed_dict
+                .AnsFeedDictCreator(train_region_images, 
+                                    train_ans_labels, 
+                                    train_parsed_q,
+                                    train_region_score,
+                                    0.5, 
+                                    plholder_dict,
+                                    vocab).feed_dict
             
 
             _, current_train_batch_acc, y_avg_eval, loss_eval = \
diff --git a/classifiers/inherit_example.py b/classifiers/inherit_example.py
new file mode 100644
index 0000000..095345a
--- /dev/null
+++ b/classifiers/inherit_example.py
@@ -0,0 +1,14 @@
+class baseclass():
+    def __init__(self, a):
+        print a
+
+    def baseMethod(self):
+        print 'Yeah inheritance'
+
+class derivedclass(baseclass):
+    def __init__(self, a, b):
+        baseclass.__init__(self, a)
+        print b
+        self.baseMethod()
+
+a = derivedclass(1,2)
diff --git a/classifiers/region_ranker/eval_rel_classifier.py b/classifiers/region_ranker/eval_rel_classifier.py
index 49046f5..0cc92cb 100644
--- a/classifiers/region_ranker/eval_rel_classifier.py
+++ b/classifiers/region_ranker/eval_rel_classifier.py
@@ -17,6 +17,7 @@ def eval(eval_params):
     sess = tf.InteractiveSession()
     train_anno_filename = eval_params['train_json']
     test_anno_filename = eval_params['test_json']
+    parsed_q_filename = eval_params['parsed_q_json']
     regions_anno_filename = eval_params['regions_json']
     image_regions_dir = eval_params['image_regions_dir']
     outdir = eval_params['outdir']
@@ -33,6 +34,7 @@ def eval(eval_params):
 
     qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename)
     qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename)
+    parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
     region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
     ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
     vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train)
@@ -40,24 +42,24 @@ def eval(eval_params):
     
     # Create graph
     g = tf.get_default_graph()
-    image_regions, questions, y, keep_prob = \
+    plholder_dict = \
         graph_creator.placeholder_inputs_rel(ans_io_helper.num_proposals,
                                              len(vocab), mode='gt')
-    placeholders = [image_regions, questions, y, keep_prob]
+    image_regions = plholder_dict['image_regions']
+    y = plholder_dict['gt_scores']
+    keep_prob = plholder_dict['keep_prob']
+
     y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
     obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
     obj_feat = obj_feat_op.outputs[0]
     y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
     atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
     atr_feat = atr_feat_op.outputs[0]
-    y_pred = graph_creator.rel_comp_graph(image_regions, questions, 
+    y_pred = graph_creator.rel_comp_graph(plholder_dict,
+                                          obj_feat, atr_feat,
                                           y_pred_obj, y_pred_atr, mode,
                                           keep_prob, len(vocab), batch_size)
 
-    # y_pred = graph_creator.rel_comp_graph(image_regions, questions, 
-    #                                       obj_feat, atr_feat, mode,
-    #                                       keep_prob, len(vocab), batch_size)
-
     # Restore model
     restorer = tf.train.Saver()
     if os.path.exists(model):
@@ -76,11 +78,11 @@ def eval(eval_params):
 
     # Test Recall
     test_recall = rel_trainer.evaluate(y_pred, qa_anno_dict, 
-                                       region_anno_dict, ans_vocab, 
-                                       vocab, image_regions_dir, 
-                                       mean_image, test_start_id, 
-                                       test_set_size, batch_size,
-                                       placeholders, 75, 75,
-                                       test_batch_creator,verbose=True)
+                                       region_anno_dict, parsed_q_dict,
+                                       ans_vocab, vocab, 
+                                       image_regions_dir, mean_image, 
+                                       test_start_id, test_set_size, 
+                                       batch_size, plholder_dict,
+                                       75, 75, test_batch_creator,verbose=True)
 
     print('Test Rec: ' + str(test_recall))
diff --git a/classifiers/region_ranker/train_rel_classifier.py b/classifiers/region_ranker/train_rel_classifier.py
index de0f046..a7f5931 100644
--- a/classifiers/region_ranker/train_rel_classifier.py
+++ b/classifiers/region_ranker/train_rel_classifier.py
@@ -39,34 +39,37 @@ def batch_recall(pred_scores, gt_scores, k):
 
     return batch_recall
 
-def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
-             image_dir, mean_image, start_index, val_set_size, batch_size,
-             placeholders, img_height, img_width, batch_creator, verbose=False):
+def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, parsed_q_dict,
+             ans_vocab, vocab, image_dir, mean_image, start_index, val_set_size,
+             batch_size, plholder_dict, img_height, img_width, batch_creator, 
+             verbose=False):
     
     recall_at_k = 0
     max_iter = int(math.floor(val_set_size/batch_size))
     for i in xrange(max_iter):
         if verbose==True:
             print('Iter: ' + str(i+1) + '/' + str(max_iter))
-        region_images, ans_labels, questions, \
-        region_score_vec, partition= batch_creator \
+        region_images, ans_labels, parsed_q, \
+        region_scores_vec, partition= batch_creator \
             .ans_mini_batch_loader(qa_anno_dict, region_anno_dict, 
                                    ans_vocab, vocab, image_dir, mean_image, 
                                    start_index+i*batch_size, batch_size, 
+                                   parsed_q_dict,
                                    img_height, img_width, 3)
-        region_score = batch_creator.reshape_score(region_score_vec)
+        region_scores = batch_creator.reshape_score(region_scores_vec)
 
-        feed_dict = {
-            placeholders[0] : region_images, 
-            placeholders[1] : questions,
-            placeholders[2] : region_score,
-            placeholders[3] : 1.0,
-        }
+        feed_dict = ans_io_helper \
+            .RelFeedDictCreator(region_images, 
+                                parsed_q,
+                                region_scores,
+                                1.0, 
+                                plholder_dict,
+                                vocab).feed_dict
 
         region_score_pred_eval = region_score_pred.eval(feed_dict)
     
         recall_at_k += batch_recall(region_score_pred_eval, 
-                                    region_score, -1)
+                                    region_scores, -1)
         
     recall_at_k /= max_iter
 
@@ -77,6 +80,7 @@ def train(train_params):
     sess = tf.InteractiveSession()
     train_anno_filename = train_params['train_json']
     test_anno_filename = train_params['test_json']
+    parsed_q_filename = train_params['parsed_q_json']
     regions_anno_filename = train_params['regions_json']
     image_dir = train_params['image_dir']
     image_regions_dir = train_params['image_regions_dir']
@@ -89,6 +93,7 @@ def train(train_params):
         os.mkdir(outdir)
 
     qa_anno_dict = ans_io_helper.parse_qa_anno(train_anno_filename)
+    parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
     region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
     ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
     vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
@@ -106,24 +111,24 @@ def train(train_params):
     
     # Create graph
     g = tf.get_default_graph()
-    image_regions, questions, y, keep_prob = \
+    plholder_dict = \
         graph_creator.placeholder_inputs_rel(ans_io_helper.num_proposals,
                                              len(vocab), mode='gt')
-    placeholders = [image_regions, questions, y, keep_prob]
+    image_regions = plholder_dict['image_regions']
+    y = plholder_dict['gt_scores']
+    keep_prob = plholder_dict['keep_prob']
+
     y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
     obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
     obj_feat = obj_feat_op.outputs[0]
     y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
     atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
     atr_feat = atr_feat_op.outputs[0]
-    y_pred = graph_creator.rel_comp_graph(image_regions, questions, 
+    y_pred = graph_creator.rel_comp_graph(plholder_dict,
+                                          obj_feat, atr_feat,
                                           y_pred_obj, y_pred_atr, mode,
                                           keep_prob, len(vocab), batch_size)
 
-    # y_pred = graph_creator.rel_comp_graph(image_regions, questions, 
-    #                                       obj_feat, atr_feat, mode,
-    #                                       keep_prob, len(vocab), batch_size)
-
     accuracy = graph_creator.evaluation(y, y_pred)
     
     cross_entropy = graph_creator.loss(y, y_pred)
@@ -139,6 +144,7 @@ def train(train_params):
         'rel/fc1/W_q',
         'rel/fc1/W_obj',
         'rel/fc1/W_atr',
+        'rel/fc1/W_explt',
         'rel/fc1/b',
         'rel/fc2/W',
         'rel/fc2/b',
@@ -161,6 +167,7 @@ def train(train_params):
         vars_dict['rel/fc1/W_q'],
         vars_dict['rel/fc1/W_obj'],
         vars_dict['rel/fc1/W_atr'],
+        vars_dict['rel/fc1/W_explt'],
         vars_dict['rel/fc2/W'],
     ]
 
@@ -244,12 +251,12 @@ def train(train_params):
 
     # Check accuracy of restored model
     if train_params['fine_tune']==True:
-        restored_recall = evaluate(y_pred, qa_anno_dict, 
-                                   region_anno_dict, ans_vocab, 
+        restored_recall = evaluate(y_pred, qa_anno_dict, region_anno_dict, 
+                                   parsed_q_dict, ans_vocab, 
                                    vocab, image_regions_dir, 
                                    mean_image, val_start_id, 
                                    val_set_size, batch_size,
-                                   placeholders, 75, 75,
+                                   plholder_dict, 75, 75,
                                    val_batch_creator)
         print('Recall of restored model: ' + str(restored_recall))
     
@@ -261,23 +268,26 @@ def train(train_params):
         train_batch_creator.shuffle_ids()
         for i in range(max_iter):
         
-            train_region_images, train_ans_labels, train_questions, \
+            train_region_images, train_ans_labels, train_parsed_q, \
             train_region_score_vec, train_partition= train_batch_creator \
                 .ans_mini_batch_loader(qa_anno_dict, region_anno_dict, 
                                        ans_vocab, vocab, 
                                        image_regions_dir, mean_image, 
-                                       1+i*batch_size, batch_size, 
+                                       1+i*batch_size, batch_size,
+                                       parsed_q_dict,
                                        75, 75, 3)
+
             train_region_score = train_batch_creator \
                 .reshape_score(train_region_score_vec)
 
-            feed_dict_train = {
-                image_regions : train_region_images, 
-                questions: train_questions,
-                keep_prob: 0.5,
-                y: train_region_score,
-            }
-            
+            feed_dict_train = ans_io_helper \
+                .RelFeedDictCreator(train_region_images, 
+                                    train_parsed_q,
+                                    train_region_score,
+                                    0.5, 
+                                    plholder_dict,
+                                    vocab).feed_dict
+
             _, current_train_batch_acc, y_pred_eval, loss_eval = \
                     sess.run([train_step, accuracy, y_pred, total_loss], 
                              feed_dict=feed_dict_train)
@@ -289,23 +299,23 @@ def train(train_params):
                                                         train_region_score, -1)
         
             if (i+1)%500==0:
-                val_recall = evaluate(y_pred, qa_anno_dict, 
-                                      region_anno_dict, ans_vocab, vocab,
+                val_recall = evaluate(y_pred, qa_anno_dict, region_anno_dict, 
+                                      parsed_q_dict, ans_vocab, vocab,
                                       image_regions_dir, mean_image, 
                                       val_start_id, val_set_size_small,
-                                      batch_size, placeholders, 75, 75,
+                                      batch_size, plholder_dict, 75, 75,
                                       val_small_batch_creator)
                 
                 print('Iter: ' + str(i+1) + ' Val Sm Rec: ' + str(val_recall))
 
         train_rec_array_epoch[epoch] = train_rec_array_epoch[epoch] / max_iter
         val_rec_array_epoch[epoch] = evaluate(y_pred, qa_anno_dict, 
-                                              region_anno_dict, ans_vocab, 
-                                              vocab, image_regions_dir, 
-                                              mean_image, val_start_id, 
-                                              val_set_size, batch_size,
-                                              placeholders, 75, 75,
-                                              val_batch_creator)
+                                              region_anno_dict, parsed_q_dict, 
+                                              ans_vocab, vocab, 
+                                              image_regions_dir, mean_image, 
+                                              val_start_id, val_set_size, 
+                                              batch_size, plholder_dict, 
+                                              75, 75, val_batch_creator)
 
         print('Val Rec: ' + str(val_rec_array_epoch[epoch]) + 
               ' Train Rec: ' + str(train_rec_array_epoch[epoch]))
diff --git a/classifiers/tf_graph_creation_helper.py b/classifiers/tf_graph_creation_helper.py
index a60f2e2..0a8c93f 100644
--- a/classifiers/tf_graph_creation_helper.py
+++ b/classifiers/tf_graph_creation_helper.py
@@ -70,17 +70,33 @@ def placeholder_inputs(mode = 'gt'):
     
 
 def placeholder_inputs_rel(num_proposals, total_vocab_size, mode = 'gt'):
-    image_regions = tf.placeholder(tf.float32, shape=[None,25,25,3])
-    keep_prob = tf.placeholder(tf.float32)
-    questions = tf.placeholder(tf.float32, shape=[None,total_vocab_size])
+    plholder_dict = {
+        'image_regions': tf.placeholder(tf.float32, [None,25,25,3], 
+                                        'image_regions'),
+        'keep_prob': tf.placeholder(tf.float32, name='keep_prob'),
+    }
+    for i in xrange(4):
+        bin_name = 'bin' + str(i)
+        plholder_dict[bin_name + '_shape'] = \
+            tf.placeholder(tf.int64, [2], bin_name + '_shape')
+        plholder_dict[bin_name + '_indices'] = \
+            tf.placeholder(tf.int64, [None, 2], bin_name + '_indices')
+        plholder_dict[bin_name + '_values'] = \
+            tf.placeholder(tf.int64, [None], bin_name + '_values')
+        plholder_dict[bin_name + '_obj_cont'] = \
+            tf.placeholder(tf.float32, [None, graph_config['num_objects']],
+                           bin_name + '_obj_cont')
+        plholder_dict[bin_name + '_atr_cont'] = \
+            tf.placeholder(tf.float32, [None, graph_config['num_attributes']],
+                           bin_name + '_atr_cont')
     if mode == 'gt':
         print 'Creating placeholder for ground truth'
-        y = tf.placeholder(tf.float32, 
-                           shape=[None, ans_io_helper.num_proposals])
-        return (image_regions, questions, y, keep_prob)
+        plholder_dict['gt_scores'] = tf.placeholder(tf.float32,\
+            shape=[None, ans_io_helper.num_proposals], name = 'gt_scores')
+        return plholder_dict
     if mode == 'no_gt':
         print 'No placeholder for ground truth'
-        return (image_regions, questions, keep_prob)
+        return plholder_dict
 
 
 def placeholder_inputs_ans(total_vocab_size, ans_vocab_size, mode='gt'):
@@ -204,16 +220,58 @@ def atr_comp_graph(x, keep_prob, obj_feat):
 
     return y_pred
 
-def rel_comp_graph(image_regions, questions, obj_feat, atr_feat, 
-                   mode, keep_prob, vocab_size, batch_size):
 
+def q_bin_embed_graph(bin_name, word_vecs, plholder_dict):
+    indices = plholder_dict[bin_name + '_indices']
+    values = plholder_dict[bin_name + '_values']
+    shape = plholder_dict[bin_name + '_shape']
+    sp_ids = tf.SparseTensor(indices, values, shape)
+    return tf.nn.embedding_lookup_sparse(word_vecs, sp_ids, None, 
+                                         name=bin_name + '_embedding')
+
+
+def explicit_feat_graph(bin_name, classifier_prob, 
+                        classifier_type, plholder_dict):
+    cont_plholder_name = bin_name + '_' + classifier_type + '_cont'
+    feat_name = 'explt_' + bin_name + '_' + classifier_type
+    dot_product = tf.mul(classifier_prob, plholder_dict[cont_plholder_name])
+    return tf.reduce_mean(dot_product, 1, keep_dims=True, name=feat_name)
+
+
+def rel_comp_graph(plholder_dict, obj_feat, atr_feat,
+                   obj_prob, atr_prob, mode, keep_prob, 
+                   vocab_size, batch_size):
+    image_regions = plholder_dict['image_regions']
     with tf.name_scope('rel') as rel_graph:
 
         with tf.name_scope('word_embed') as q_embed:
             word_vecs = weight_variable([vocab_size,
                                          graph_config['word_vec_dim']],
                                         var_name='word_vecs')
-            q_feat = tf.matmul(questions, word_vecs, name='q_feat')
+            bin0_embed = q_bin_embed_graph('bin0', word_vecs, plholder_dict)
+            bin1_embed = q_bin_embed_graph('bin1', word_vecs, plholder_dict)
+            bin2_embed = q_bin_embed_graph('bin2', word_vecs, plholder_dict)
+            bin3_embed = q_bin_embed_graph('bin3', word_vecs, plholder_dict)
+            q_feat = tf.concat(1, [bin0_embed,
+                                   bin1_embed,
+                                   bin2_embed,
+                                   bin3_embed], name='q_feat')
+            
+        with tf.name_scope('explicit_feat') as expl_feat:
+            explt_feat_list = []
+            for bin_num in xrange(4):
+                bin_name = 'bin'+ str(bin_num)
+                explt_feat_list.append(explicit_feat_graph(bin_name, obj_prob, 
+                                        'obj', plholder_dict))
+                explt_feat_list.append(explicit_feat_graph(bin_name, atr_prob, 
+                                        'atr', plholder_dict))
+
+            concat_explt_feat = tf.concat(1, explt_feat_list, 
+                                          name = 'concat_explt_feat')
+                                
+            concat_explt_feat_dim = concat_explt_feat.get_shape()[1].value
+            print('Concatenate explicit feature dimension: ' + \
+                  str(concat_explt_feat_dim)) 
 
         with tf.name_scope('conv1') as conv1:
             W_conv1 = weight_variable([5,5,3,4])
@@ -248,47 +306,57 @@ def rel_comp_graph(image_regions, questions, obj_feat, atr_feat,
             print 'Atr feat dim: {}'.format(atr_feat_dim)
             W_reg_fc1 = weight_variable([reg_feat_dim, fc1_dim], 
                                         var_name='W_reg')
-            W_q_fc1 = weight_variable([graph_config['word_vec_dim'], 
+            W_q_fc1 = weight_variable([graph_config['q_embed_dim'], 
                                        fc1_dim], var_name='W_q')
             W_obj_fc1 = weight_variable([obj_feat_dim, 
                                          fc1_dim], var_name='W_obj')
             W_atr_fc1 = weight_variable([atr_feat_dim, 
                                          fc1_dim], var_name='W_atr')
+            W_explt_fc1 = weight_variable([concat_explt_feat_dim,
+                                           fc1_dim], var_name='W_explt')
             b_fc1 = bias_variable([fc1_dim])
             
             a_reg_fc1 = tf.matmul(reg_feat, W_reg_fc1, name='a_reg_fc1')
             a_q_fc1 = tf.matmul(q_feat, W_q_fc1, name='a_q_fc1')
             a_obj_fc1 = tf.matmul(obj_feat, W_obj_fc1, name='a_obj_fc1')
             a_atr_fc1 = tf.matmul(atr_feat, W_atr_fc1, name='a_atr_fc1')
-
+            a_explt_fc1 = tf.matmul(concat_explt_feat, W_explt_fc1,
+                                    name='a_explt_fc1')
             coeff = {
                 'reg': 0.0,
                 'q': 0.0,
                 'obj': 0.0,
                 'atr': 0.0,
+                'explt': 0.0,
             }
             
-            if mode=='q_reg':
+            if mode=='q_reg_explt':
                 print mode
-                coeff['reg'] = 1/2.0
-                coeff['q'] = 1/2.0
+                coeff['reg'] = 1/3.0
+                coeff['q'] = 1/3.0
+                coeff['explt'] = 1/3.0
 
-            elif mode=='q_obj_atr':
+            elif mode=='q_obj_atr_explt':
                 print mode
-                coeff['q'] = 1/3.0
-                coeff['obj'] = 1/3.0
-                coeff['atr'] = 1/3.0
+                coeff['q'] = 0.1
+                coeff['obj'] = 0.1
+                coeff['atr'] = 0.1
+                coeff['explt'] = 0.7
 
-            elif mode=='q_obj_atr_reg':
+            elif mode=='q_obj_atr_reg_explt':
                 print mode
-                coeff['q'] = 1/4.0
-                coeff['obj'] = 1/4.0
-                coeff['atr'] = 1/4.0
-                coeff['reg'] = 1/4.0
+                coeff['q'] = 0.05
+                coeff['obj'] = 0.05
+                coeff['atr'] = 0.05
+                coeff['reg'] = 0.05
+                coeff['explt'] = 0.8
+
+            elif mode=='explt':
+                coeff['explt'] = 1.0
 
             a_fc1 = coeff['reg']*a_reg_fc1 + coeff['q']*a_q_fc1 + \
                     coeff['obj']*a_obj_fc1 + coeff['atr']*a_atr_fc1 + \
-                    b_fc1
+                    coeff['explt']*a_explt_fc1 + b_fc1
 
             h_fc1 = tf.nn.relu(a_fc1, name='h')
             h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_drop')
@@ -306,16 +374,7 @@ def rel_comp_graph(image_regions, questions, obj_feat, atr_feat,
         y_pred = tf.nn.softmax(logits, name='softmax')
 
     return y_pred
-
-
-def q_bin_embed_graph(bin_name, word_vecs, plholder_dict):
-    indices = plholder_dict[bin_name + '_indices']
-    values = plholder_dict[bin_name + '_values']
-    shape = plholder_dict[bin_name + '_shape']
-    sp_ids = tf.SparseTensor(indices, values, shape)
-    return tf.nn.embedding_lookup_sparse(word_vecs, sp_ids, None, 
-                                         name=bin_name + '_embedding')
-    
+   
 
 def ans_comp_graph(plholder_dict, obj_feat, atr_feat, 
                    vocab, inv_vocab, ans_vocab_size, mode):
@@ -426,6 +485,160 @@ def ans_comp_graph(plholder_dict, obj_feat, atr_feat,
         return y_pred
 
 
+def ans_comp_margin_graph(plholder_dict, obj_feat, atr_feat, obj_prob, atr_prob,
+                          vocab, inv_vocab, ans_vocab, mode):
+    vocab_size = len(vocab)
+    image_regions = plholder_dict['image_regions']
+    keep_prob = plholder_dict['keep_prob']
+    ans_vocab_size = len(ans_vocab)
+    
+    inv_ans_vocab = {v:k for k, v in ans_vocab.items()}
+    ans_in_vocab_ids_list = []
+    for i in xrange(ans_vocab_size):
+        ans_in_vocab_ids_list.append(vocab[inv_ans_vocab[i]])
+
+    ans_in_vocab_ids_tensor = tf.constant(ans_in_vocab_ids_list, dtype=tf.int64)
+
+    with tf.name_scope('ans') as ans_graph:
+
+        with tf.name_scope('word_embed') as word_embed:
+
+            word_vecs = weight_variable([vocab_size,
+                                         graph_config['word_vec_dim']],
+                                        var_name='word_vecs')
+            
+            bin0_embed = q_bin_embed_graph('bin0', word_vecs, plholder_dict)
+            bin1_embed = q_bin_embed_graph('bin1', word_vecs, plholder_dict)
+            bin2_embed = q_bin_embed_graph('bin2', word_vecs, plholder_dict)
+            bin3_embed = q_bin_embed_graph('bin3', word_vecs, plholder_dict)
+            q_feat = tf.concat(1, [bin0_embed,
+                                   bin1_embed,
+                                   bin2_embed,
+                                   bin3_embed], name='q_feat')
+            
+            ans_embed = tf.nn.embedding_lookup(word_vecs, ans_in_vocab_ids_list,
+                                               name='ans_embed')
+
+        with tf.name_scope('explicit_feat') as expl_feat:
+            explt_feat_list = []
+            for bin_num in xrange(4):
+                bin_name = 'bin'+ str(bin_num)
+                explt_feat_list.append(explicit_feat_graph(bin_name, obj_prob, 
+                                        'obj', plholder_dict))
+                explt_feat_list.append(explicit_feat_graph(bin_name, atr_prob, 
+                                        'atr', plholder_dict))
+
+            concat_explt_feat = tf.concat(1, explt_feat_list, 
+                                          name = 'concat_explt_feat')
+                                
+            concat_explt_feat_dim = concat_explt_feat.get_shape()[1].value
+            print('Concatenate explicit feature dimension: ' + \
+                  str(concat_explt_feat_dim))
+
+        with tf.name_scope('conv1') as conv1:
+            num_filters_conv1 = 4
+            W_conv1 = weight_variable([5,5,3,num_filters_conv1])
+            b_conv1 = bias_variable([num_filters_conv1])
+            a_conv1 = tf.add(conv2d(image_regions, W_conv1), b_conv1, name='a')
+            h_conv1 = tf.nn.relu(a_conv1, name='h')
+            h_pool1 = max_pool_2x2(h_conv1)
+            h_conv1_drop = tf.nn.dropout(h_pool1, keep_prob, name='h_pool_drop')
+
+        with tf.name_scope('conv2') as conv2:
+            num_filters_conv2 = 8
+            W_conv2 = weight_variable([3,3,num_filters_conv1,num_filters_conv2])
+            b_conv2 = bias_variable([num_filters_conv2])
+            a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a')
+            h_conv2 = tf.nn.relu(a_conv2, name='h')
+            h_pool2 = max_pool_2x2(h_conv2)
+            h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob, name='h_pool_drop')
+            h_pool2_drop_shape = h_pool2_drop.get_shape()
+            region_feat_dim = reduce(lambda f, g: f*g, 
+                                  [dim.value for dim in h_pool2_drop_shape[1:]])
+            region_feat = tf.reshape(h_pool2_drop, [-1, region_feat_dim], 
+                                     name='region_feat')
+
+            print('Region feature dimension: ' + str(region_feat_dim)) #392
+       
+        with tf.name_scope('fc1') as fc1:
+
+            fc1_dim = graph_config['ans_fc1_dim']
+            W_region_fc1 = weight_variable([region_feat_dim, 
+                                            fc1_dim], var_name='W_region')
+            W_obj_fc1 = weight_variable([graph_config['obj_feat_dim'], 
+                                         fc1_dim], var_name='W_obj')
+            W_atr_fc1 = weight_variable([graph_config['atr_feat_dim'], 
+                                         fc1_dim], var_name='W_atr')
+            W_q_fc1 = weight_variable([graph_config['q_embed_dim'], 
+                                       fc1_dim], var_name='W_q')
+            W_explt_fc1 = weight_variable([concat_explt_feat_dim,
+                                           fc1_dim], var_name='W_explt')
+            b_fc1 = bias_variable([fc1_dim])
+
+            a_fc1_region = tf.matmul(region_feat, W_region_fc1, 
+                                     name='a_fc1_region')
+            a_fc1_obj = tf.matmul(obj_feat, W_obj_fc1, name='a_fc1_obj') 
+            a_fc1_atr = tf.matmul(atr_feat, W_atr_fc1, name='a_fc1_atr')
+            a_fc1_q = tf.matmul(q_feat, W_q_fc1, name='a_fc1_q')
+            a_explt_fc1 = tf.matmul(concat_explt_feat, W_explt_fc1,
+                                    name='a_explt_fc1')        
+            coeff_reg = 0.0
+            coeff_obj = 0.0
+            coeff_atr = 0.0
+            coeff_q = 0.0
+            coeff_explt = 0.0
+
+            if mode=='q':
+                coeff_q = 1.0
+
+            elif mode=='q_reg':
+                coeff_q = 1/2.0
+                coeff_reg = 1/2.0
+
+            elif mode=='q_obj_atr':
+                coeff_q = 1/4.0
+                coeff_obj = 1/4.0
+                coeff_atr = 1/4.0
+                coeff_explt = 1/4.0
+
+            elif mode=='q_obj_atr_reg':
+                coeff_q = 1/5.0
+                coeff_obj = 1/5.0
+                coeff_atr = 1/5.0
+                coeff_reg = 1/5.0
+                coeff_explt = 1/5.0
+
+            a_fc1 = coeff_reg * a_fc1_region + \
+                    coeff_obj * a_fc1_obj + \
+                    coeff_atr * a_fc1_atr + \
+                    coeff_q * a_fc1_q
+            
+            h_fc1 = tf.nn.relu(a_fc1, name='h')
+            h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_drop')
+
+        with tf.name_scope('fc2') as fc2:
+            W_feat_fc2 = weight_variable([fc1_dim, 
+                                          graph_config['word_vec_dim']],
+                                         var_name='W_feat')
+            b_feat_fc2 = bias_variable([graph_config['word_vec_dim']],
+                                       var_name='b_feat')
+            W_ans_fc2 = weight_variable([graph_config['word_vec_dim'], 
+                                         graph_config['word_vec_dim']],
+                                        var_name='W_ans')      
+            b_ans_fc2 = bias_variable([graph_config['word_vec_dim']],
+                                      var_name='b_ans')
+            comb_feat_embed = tf.add(tf.matmul(h_fc1_drop, W_feat_fc2), 
+                                     b_feat_fc2, 
+                                     name='comb_feat_embed')
+            comb_ans_embed = tf.add(tf.matmul(ans_embed, W_ans_fc2), 
+                                     b_ans_fc2, 
+                                     name='comb_feat_embed')
+        ans_scores = tf.matmul(comb_feat_embed, tf.transpose(comb_ans_embed), 
+                               name='ans_scores')
+        ans_scores = tf.nn.l2_normalize(ans_scores, 1)*3.0
+        return tf.nn.softmax(ans_scores)
+
+
 def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals, 
                      ans_vocab_size):
     y_pred_list = tf.split(0, batch_size, y_pred)
@@ -453,6 +666,12 @@ def loss(y, y_pred):
     return tf.truediv(cross_entropy, tf.cast(batch_size[0],tf.float32))
 
 
+def margin_loss(y, y_pred, margin):
+    correct_score = tf.reduce_sum(tf.mul(y, y_pred), 1, 
+                                  keep_dims=True, name='correct_score')
+    return tf.reduce_mean(tf.maximum(0.0, y + margin - correct_score))
+
+
 def regularize_params(param_list):
     regularizer = tf.zeros(shape=[])
     for param in param_list:
diff --git a/classifiers/train_classifiers.py b/classifiers/train_classifiers.py
index 015c6eb..b8972de 100644
--- a/classifiers/train_classifiers.py
+++ b/classifiers/train_classifiers.py
@@ -65,16 +65,17 @@ rel_classifier_train_params = {
     'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
     'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
     'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
+    'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
     'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
     'image_regions_dir': '/mnt/ramdisk/image_regions',
-    'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob',
+    'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt',
     'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1',
-    'mode': 'q_obj_atr',
-    'adam_lr' : 0.001,
+    'mode': 'q_obj_atr_reg_explt',
+    'adam_lr' : 0.0001,
     'crop_n_save_regions': False,
-    'max_epoch': 5,
+    'max_epoch': 10,
     'batch_size': 10,
-    'fine_tune': False,
+    'fine_tune': True,
     'start_model': 4, # Used only if fine_tune is True
 }
 
@@ -82,12 +83,13 @@ rel_classifier_eval_params = {
     'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
     'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
     'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
+    'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
     'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
     'image_regions_dir': '/mnt/ramdisk/image_regions',
-    'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob',
-    'model_basedir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob',
-    'model_number': 4,
-    'mode': 'q_obj_atr',
+    'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt',
+    'model_basedir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt',
+    'model_number': 9,
+    'mode': 'q_obj_atr_reg_explt',
     'batch_size': 20,
     'test_start_id': 94645,
     'test_set_size': 143495-94645+1,
@@ -100,15 +102,15 @@ ans_classifier_train_params = {
     'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
     'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
     'image_regions_dir': '/mnt/ramdisk/image_regions',
-    'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_w_Rel',
-    'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier/rel_classifier_q_obj_atr-4',
+    'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin',
+    'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt/rel_classifier_q_obj_atr_reg_explt-9',
     'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1',
-    'adam_lr' : 0.001,
-    'mode' : 'q',
+    'adam_lr' : 0.0001,
+    'mode' : 'q_obj_atr',
     'crop_n_save_regions': False,
-    'max_epoch': 5,
+    'max_epoch': 10,
     'batch_size': 10,
-    'fine_tune': False,
+    'fine_tune': True,
     'start_model': 4, # When fine_tune is false used to pre-initialize q_obj_atr with q model etc
 }
 
-- 
GitLab