diff --git a/classifiers/ans_graph_creator.py b/classifiers/ans_graph_creator.py
new file mode 100644
index 0000000000000000000000000000000000000000..91fe8e5aa60c5087191be0e82b61b8db99ef2d20
--- /dev/null
+++ b/classifiers/ans_graph_creator.py
@@ -0,0 +1,172 @@
+import numpy as np
+import math
+import pdb
+import tensorflow as tf
+import answer_classifier.ans_data_io_helper as ans_io_helper
+from tf_graph_creation_helper import weight_variable, bias_variable, \
+    q_bin_embed_graph, conv2d, max_pool_2x2, batchnorm
+
+
+class ans_graph_creator():
+    def __init__(self, 
+                 plholder_dict,
+                 obj_feat, 
+                 atr_feat, 
+                 obj_prob, 
+                 atr_prob,
+                 vocab, 
+                 inv_vocab, 
+                 ans_vocab,
+                 batch_size,
+                 graph_config,
+                 mode='q_obj_atr', 
+                 is_train=True):
+        
+        self.mode = mode
+        self.is_train = plholder_dict['is_train']
+        self.keep_prob = plholder_dict['keep_prob']
+        image_regions = plholder_dict['image_regions']
+        vocab_size = len(vocab)
+
+        with tf.name_scope('ans') as ans_graph:
+            # Word Vectors
+            word_vecs = self.create_word_vecs(vocab_size, 
+                                              graph_config['word_vec_dim'])
+
+            # Feature Computations
+            q_feat = self.add_q_feat_comp_layer(word_vecs, plholder_dict)
+            reg_feat = self.add_reg_feat_comp_layer(image_regions)
+
+            # Feature Projections (with batch norm)
+            feat_proj_dim = graph_config['joint_embed_dim']
+            proj_feat = dict()
+
+            proj_feat['q'] = self.fc_layer(q_feat, feat_proj_dim,
+                                           'q_feat_proj_layer')
+        
+            proj_feat['reg'] = self.fc_layer(reg_feat, feat_proj_dim,
+                                             'reg_feat_proj_layer')
+
+            proj_feat['obj'] = self.fc_layer(obj_feat, feat_proj_dim,
+                                             'obj_feat_proj_layer')
+            
+            proj_feat['atr'] = self.fc_layer(atr_feat, feat_proj_dim,
+                                             'atr_feat_proj_layer') 
+        
+            # Feature Combination
+            coeffs = self.mixing_coeffs()
+            print coeffs
+            num_regions = batch_size*ans_io_helper.num_proposals
+            comb_feat = tf.zeros(shape=[num_regions, feat_proj_dim], 
+                                 dtype=tf.float32)
+            for feat_type, feat in proj_feat.items():
+                comb_feat = comb_feat + feat * coeffs[feat_type]
+                
+            # Answer feature
+            ans_feat = self.compute_ans_feat(word_vecs, vocab, ans_vocab)
+
+            # Proj answer
+            proj_ans_feat = self.fc_layer(ans_feat, feat_proj_dim,
+                                          'ans_feat_proj_layer')
+
+            # Compute Cosine Distance
+            self.cosine_dist = self.compute_cosine_dist(comb_feat, 
+                                                        proj_ans_feat)
+                
+    def create_word_vecs(self, vocab_size, word_vec_dim):
+        word_vecs = weight_variable([vocab_size,
+                                     word_vec_dim],
+                                    var_name='word_vecs')
+        word_vecs = tf.nn.l2_normalize(word_vecs, 1)
+        tf.add_to_collection('regularize',word_vecs)
+        return word_vecs
+        
+    def add_q_feat_comp_layer(self, word_vecs, plholder_dict):
+        with tf.name_scope('q_feat_comp_layer') as q_feat_comp_layer:
+            bin0_embed = q_bin_embed_graph('bin0', word_vecs, plholder_dict)
+            bin1_embed = q_bin_embed_graph('bin1', word_vecs, plholder_dict)
+            bin2_embed = q_bin_embed_graph('bin2', word_vecs, plholder_dict)
+            bin3_embed = q_bin_embed_graph('bin3', word_vecs, plholder_dict)
+            q_feat = tf.concat(1, 
+                               [bin0_embed, bin1_embed, bin2_embed, bin3_embed],
+                               name='q_feat')
+        return q_feat
+
+    def add_reg_feat_comp_layer(self, image_regions):
+        with tf.name_scope('reg_feat_comp_layer') as reg_feat_comp_layer:
+            with tf.name_scope('conv1') as conv1:
+                W_conv1 = weight_variable([5,5,3,4])
+                b_conv1 = bias_variable([4])
+                a_conv1 = tf.add(conv2d(image_regions, W_conv1), 
+                                 b_conv1, name='a')
+                h_conv1 = tf.nn.relu(a_conv1, name='h')
+                h_pool1 = max_pool_2x2(h_conv1)
+                h_conv1_drop = tf.nn.dropout(h_pool1, self.keep_prob, 
+                                             name='h_pool_drop')
+
+            with tf.name_scope('conv2') as conv2:
+                W_conv2 = weight_variable([3,3,4,8])
+                b_conv2 = bias_variable([8])
+                a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a')
+                h_conv2 = tf.nn.relu(a_conv2, name='h')
+                h_pool2 = max_pool_2x2(h_conv2)
+                h_pool2_drop = tf.nn.dropout(h_pool2, self.keep_prob, 
+                                             name='h_pool_drop')
+                h_pool2_drop_shape = h_pool2_drop.get_shape()
+                reg_feat_dim = reduce(lambda f, g: f*g, 
+                                      [dim.value for dim in 
+                                       h_pool2_drop_shape[1:]])
+                reg_feat = tf.reshape(h_pool2_drop, [-1, reg_feat_dim], 
+                                      name='reg_feat')
+
+            tf.add_to_collection('regularize', W_conv1)
+            tf.add_to_collection('regularize', W_conv2)
+        
+        return reg_feat
+
+    def fc_layer(self, feat, proj_dim, name_scope):
+        with tf.name_scope(name_scope) as fc_layer:
+            feat_dim = feat.get_shape()[1].value
+            W1 = weight_variable([feat_dim, proj_dim])
+            b1 = bias_variable([proj_dim])
+            proj_feat = tf.add(tf.matmul(feat, W1), b1)
+            bn_proj_feat = batchnorm(proj_feat, None, self.is_train)
+            W2 = weight_variable([proj_dim, proj_dim])
+            b2 = bias_variable([proj_dim])
+            bn_proj_feat = tf.add(tf.matmul(tf.nn.relu(bn_proj_feat), W2), b2)
+        tf.add_to_collection('regularize', W1)
+        tf.add_to_collection('regularize', W2)
+
+        return bn_proj_feat
+        
+    def mixing_coeffs(self):
+        feat_types = ['q', 'obj', 'atr', 'reg']
+        coeffs = dict()
+        count = 0;
+        for feat_type in feat_types:
+            if feat_type in self.mode:
+                coeffs[feat_type] = 1.0
+                count += 1
+            else:
+                coeffs[feat_type] = 0.0
+        coeffs = {k: v/count for k, v in coeffs.items()}
+        return coeffs
+          
+    def compute_ans_feat(self, word_vecs, vocab, ans_vocab):
+        ans_vocab_size = len(ans_vocab)
+        inv_ans_vocab = {v:k for k, v in ans_vocab.items()}
+        ans_in_vocab_ids_list = []
+        for i in xrange(ans_vocab_size):
+            ans_in_vocab_ids_list.append(vocab[inv_ans_vocab[i]])
+
+        ans_in_vocab_ids_tensor = tf.constant(ans_in_vocab_ids_list, 
+                                              dtype=tf.int64)
+        ans_feat = tf.nn.embedding_lookup(word_vecs, ans_in_vocab_ids_tensor,
+                                          name='ans_feat')
+        return ans_feat
+
+    def compute_cosine_dist(self, feat1, feat2):
+        feat1 = tf.nn.l2_normalize(feat1, 1)
+        feat2 = tf.nn.l2_normalize(feat2, 1)
+        return tf.matmul(feat1, tf.transpose(feat2))
+    
diff --git a/classifiers/answer_classifier/ans_data_io_helper.py b/classifiers/answer_classifier/ans_data_io_helper.py
index d5e419b4919c0fab8672ce044135fb9bc3feb550..b6b51eb123140dc5290ea4f82c4b2bea4428df62 100644
--- a/classifiers/answer_classifier/ans_data_io_helper.py
+++ b/classifiers/answer_classifier/ans_data_io_helper.py
@@ -309,11 +309,12 @@ class RelFeedDictCreator(FeedDictCreator):
 
 class AnsFeedDictCreator(FeedDictCreator):
     def __init__(self, region_images, ans_labels, parsed_q, 
-                 region_scores, keep_prob, plholder_dict, vocab):
+                 region_scores, keep_prob, plholder_dict, vocab, is_train):
         FeedDictCreator.__init__(self, region_images, parsed_q,
                                  keep_prob, plholder_dict, vocab)
         self.feed_dict[plholder_dict['gt_answer']] = ans_labels
         self.feed_dict[plholder_dict['region_score']] = region_scores
+        self.feed_dict[plholder_dict['is_train']] = is_train
 
 
 class html_ans_table_writer():
diff --git a/classifiers/answer_classifier/eval_ans_classifier_simple.py b/classifiers/answer_classifier/eval_ans_classifier_simple.py
new file mode 100644
index 0000000000000000000000000000000000000000..d50ad5db3cc1229dcc94f67410f5660395f5105d
--- /dev/null
+++ b/classifiers/answer_classifier/eval_ans_classifier_simple.py
@@ -0,0 +1,314 @@
+import sys
+import os
+import json
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import numpy as np
+import math
+import random
+import pdb
+import tensorflow as tf
+import tf_graph_creation_helper as graph_creator
+import ans_graph_creator
+import plot_helper as plotter
+import ans_data_io_helper as ans_io_helper
+import region_ranker.perfect_ranker as region_proposer 
+import train_ans_classifier as ans_trainer
+from PIL import Image, ImageDraw
+
+def get_pred(y, qa_anno_dict, region_anno_dict, parsed_q_dict, ans_vocab, vocab,
+             image_dir, mean_image, start_index, val_set_size, batch_size,
+             plholder_dict, img_height, img_width, batch_creator):
+
+    inv_ans_vocab = {v: k for k, v in ans_vocab.items()}
+    pred_list = []
+    correct = 0
+    max_iter = int(math.ceil(val_set_size*1.0/batch_size))
+    batch_size_tmp = batch_size
+    for i in xrange(max_iter):
+        if i==(max_iter-1):
+            batch_size_tmp = val_set_size - i*batch_size
+
+        print('Iter: ' + str(i+1) + '/' + str(max_iter))
+
+        region_images, ans_labels, parsed_q, \
+        region_score, partition = batch_creator \
+            .ans_mini_batch_loader(qa_anno_dict, 
+                                   region_anno_dict, 
+                                   ans_vocab, vocab, 
+                                   image_dir, mean_image, 
+                                   start_index+i*batch_size, 
+                                   batch_size_tmp, parsed_q_dict,
+                                   img_height, img_width, 3)
+            
+        if i==max_iter-1:
+                                    
+            residual_batch_size = batch_size - batch_size_tmp
+            residual_regions = residual_batch_size*ans_io_helper.num_proposals
+
+            residual_region_images = np.zeros(shape=[residual_regions,
+                                                     img_height/3, img_width/3, 
+                                                     3])
+            # residual_questions = np.zeros(shape=[residual_regions, 
+            #                                      len(vocab)])
+            
+            residual_ans_labels = np.zeros(shape=[residual_batch_size, 
+                                                  len(ans_vocab)])
+            residual_region_score = np.zeros(shape=[1, residual_regions])
+
+            region_images = np.concatenate((region_images, 
+                                            residual_region_images),
+                                           axis=0)
+#            questions = np.concatenate((questions, residual_questions), axis=0)
+            for k in xrange(batch_size_tmp*22, batch_size*22):
+                parsed_q[k] = {
+                    'bin0': [''],
+                    'bin1': [''],
+                    'bin2': [''],
+                    'bin3': [''],
+                }
+
+            ans_labels = np.concatenate((ans_labels, residual_ans_labels), 
+                                        axis=0)
+            region_score = np.concatenate((region_score, residual_region_score),
+                                          axis=1)
+
+        
+        feed_dict = ans_io_helper \
+            .AnsFeedDictCreator(region_images, 
+                                ans_labels, 
+                                parsed_q,
+                                region_score,
+                                1.0, 
+                                plholder_dict,
+                                vocab, 
+                                False).feed_dict
+
+        ans_ids = np.argmax(y.eval(feed_dict), 1)
+        for j in xrange(batch_size_tmp):
+            pred_list = pred_list + [{
+                'question_id' : start_index+i*batch_size+j,
+                'answer' : inv_ans_vocab[ans_ids[j]]
+            }]
+
+    return pred_list
+
+def eval(eval_params):
+    sess = tf.InteractiveSession()
+    
+    train_anno_filename = eval_params['train_json']
+    test_anno_filename = eval_params['test_json']
+    parsed_q_filename = eval_params['parsed_q_json']
+    regions_anno_filename = eval_params['regions_json']
+    image_regions_dir = eval_params['image_regions_dir']
+    outdir = eval_params['outdir']
+    model = eval_params['model']
+    batch_size = eval_params['batch_size']
+    test_start_id = eval_params['test_start_id']
+    test_set_size = eval_params['test_set_size']
+    if not os.path.exists(outdir):
+        os.mkdir(outdir)
+
+    qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename)
+    qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename)
+    parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
+    region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
+    ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
+    vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train)
+
+    # Create graph
+    g = tf.get_default_graph()
+    plholder_dict = graph_creator.placeholder_inputs_ans(len(vocab), 
+                                                         len(ans_vocab), 
+                                                         mode='gt')
+
+    image_regions = plholder_dict['image_regions']
+    questions = plholder_dict['questions']
+    keep_prob = plholder_dict['keep_prob']
+    y = plholder_dict['gt_answer']
+    region_score = plholder_dict['region_score']
+
+    y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
+    obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
+    obj_feat = obj_feat_op.outputs[0]
+    y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
+    atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
+    atr_feat = atr_feat_op.outputs[0]
+    pred_rel_score = graph_creator.rel_comp_graph(plholder_dict,
+                                                  obj_feat, atr_feat,
+                                                  y_pred_obj, y_pred_atr,
+                                                  'q_obj_atr_reg',
+                                                  1.0, len(vocab), batch_size) 
+
+    ans_graph = ans_graph_creator.ans_graph_creator(plholder_dict,
+                                                    obj_feat,
+                                                    atr_feat,
+                                                    y_pred_obj,
+                                                    y_pred_atr,
+                                                    vocab,
+                                                    inv_vocab,
+                                                    ans_vocab,
+                                                    batch_size,
+                                                    graph_creator.graph_config,
+                                                    eval_params['mode'],
+                                                    True)
+
+    y_pred = ans_graph.cosine_dist
+
+    pred_rel_score_vec = tf.reshape(pred_rel_score, 
+                                    [1, batch_size*ans_io_helper.num_proposals])
+
+    y_avg = graph_creator.aggregate_y_pred(y_pred, region_score, 
+                                           batch_size,  
+                                           ans_io_helper.num_proposals, 
+                                           len(ans_vocab))
+    
+    accuracy = graph_creator.evaluation(y, y_avg)
+
+    # Collect variables
+    rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel')
+    obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj')
+    atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr')
+
+    # Restore model
+    restorer = tf.train.Saver()
+    if os.path.exists(model):
+        restorer.restore(sess, model)
+    else:
+        print 'Failed to read model from file ' + model
+
+#    sess.run(tf.initialize_variables(vars_to_init))
+
+    mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
+                         'Obj_Classifier/mean_image.npy')
+
+    # Batch creator
+    test_batch_creator = ans_io_helper.batch_creator(test_start_id,
+                                                     test_start_id 
+                                                     + test_set_size - 1)
+    # Get predictions
+    pred_dict = get_pred(y_avg, qa_anno_dict, region_anno_dict, 
+                         parsed_q_dict, ans_vocab, 
+                         vocab, image_regions_dir, mean_image, test_start_id, 
+                         test_set_size, batch_size, plholder_dict, 75, 75,
+                         test_batch_creator)
+
+    json_filename = os.path.join(outdir, 'predicted_ans_' + \
+                                 eval_params['mode'] + '.json')
+    with open(json_filename,'w') as json_file:
+        json.dump(pred_dict, json_file)
+
+    
+def create_html_file(outdir, test_anno_filename, regions_anno_filename,
+                     pred_json_filename, image_dir, num_pred_to_display, mode):
+    qa_dict = ans_io_helper.parse_qa_anno(test_anno_filename)
+    region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
+    ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
+
+    with open(pred_json_filename,'r') as json_file:
+        raw_data = json.load(json_file)
+    
+    # Create director for storing images with region boxes
+    images_bbox_dir = os.path.join(outdir, 'images_bbox' + '_' + mode)
+    if not os.path.exists(images_bbox_dir):
+        os.mkdir(images_bbox_dir)
+    
+    col_dict = {
+        0 : 'Question_Id',
+        1 : 'Question',
+        2 : 'Answer (GT)',
+        3 : 'Answer (Pred)',
+        4 : 'Image',
+    }
+    html_correct_filename = os.path.join(outdir, 
+                                         'correct_ans_' + mode + '.html')
+    html_writer_correct = ans_io_helper \
+        .html_ans_table_writer(html_correct_filename)
+    html_writer_correct.add_element(col_dict)
+
+    html_incorrect_filename = os.path.join(outdir, 
+                                           'incorrect_ans_' + mode + '.html')
+    html_writer_incorrect = ans_io_helper \
+        .html_ans_table_writer(html_incorrect_filename)
+    html_writer_incorrect.add_element(col_dict)
+
+    region_coords, region_coords_ = region_proposer.get_region_coords(300,300)
+    
+    random.shuffle(raw_data)
+
+    count = 0
+    for entry in raw_data:
+        if count == num_pred_to_display:
+            break
+        q_id = entry['question_id']
+        pred_ans = entry['answer']
+        gt_ans = qa_dict[q_id].answer
+        question = qa_dict[q_id].question
+        img_id = qa_dict[q_id].image_id
+        image_filename = os.path.join(image_dir, str(img_id) + '.jpg')
+        image = Image.open(image_filename)
+        
+        regions = region_proposer.rank_regions(image, question, region_coords, 
+                                               region_coords_, 
+                                               region_anno_dict[img_id],
+                                               crop=False)
+        dr = ImageDraw.Draw(image)
+        # print(q_id)
+        # print([regions[key].score for key in regions.keys()])
+        for i in xrange(ans_io_helper.num_proposals):
+            if not regions[i].score==0:
+                coord = regions[i].coord
+                x1 = coord[0]
+                y1 = coord[1]
+                x2 = coord[2]
+                y2 = coord[3]
+                dr.rectangle([(x1,y1),(x2,y2)], outline="red")
+        
+        image_bbox_filename = os.path.join(images_bbox_dir,str(q_id) + '.jpg')
+        image.save(image_bbox_filename)
+        image_bbox_filename_rel = 'images_bbox_'+ mode +'/'+ str(q_id) + '.jpg' 
+        col_dict = {
+            0 : q_id,
+            1 : question,
+            2 : gt_ans,
+            3 : pred_ans,
+            4 : html_writer_correct.image_tag(image_bbox_filename_rel,50,50)
+        }
+        if pred_ans==gt_ans:
+            html_writer_correct.add_element(col_dict)
+        else:
+            html_writer_incorrect.add_element(col_dict)
+
+        count += 1
+
+    html_writer_correct.close_file()
+    html_writer_incorrect.close_file()
+    
+
+if __name__=='__main__':
+    mode = 'q_obj_atr'
+    model_num = 4
+    ans_classifier_eval_params = {
+        'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
+        'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
+        'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
+        'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
+        'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
+        'image_regions_dir': '/mnt/ramdisk/image_regions',
+        'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin',
+        'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt/rel_classifier_q_obj_atr_reg_explt-9',
+        'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin/ans_classifier_' + mode + '-' + str(model_num),
+        'mode' : mode,
+        'batch_size': 20,
+        'test_start_id': 94645,
+        'test_set_size': 143495-94645+1,
+    }
+    
+    eval(ans_classifier_eval_params)
+    outdir = ans_classifier_eval_params['outdir']
+    test_anno_filename = ans_classifier_eval_params['test_json']
+    regions_anno_filename = ans_classifier_eval_params['regions_json']
+    pred_json_filename = os.path.join(outdir, 'predicted_ans_'+ mode  +'.json')
+    image_dir = ans_classifier_eval_params['image_dir']
+    create_html_file(outdir, test_anno_filename, regions_anno_filename,
+                      pred_json_filename, image_dir, 1000, mode)
diff --git a/classifiers/answer_classifier/train_ans_classifier.py b/classifiers/answer_classifier/train_ans_classifier.py
index f57515c5e33ff8a270ea621aa058689464e09e5c..4b4f1ca8a89da827f6ca65f02794bf0bcd831823 100644
--- a/classifiers/answer_classifier/train_ans_classifier.py
+++ b/classifiers/answer_classifier/train_ans_classifier.py
@@ -36,7 +36,7 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
         'ans/fc2/W_feat',
         'ans/fc2/b_feat',
         'ans/fc2/W_ans',
-        'ans/fc2/b_ans'
+        'ans/fc2/b_ans',
     ]
 
     vars_dict = graph_creator.get_list_of_variables(list_of_vars)
@@ -237,7 +237,9 @@ def train(train_params):
                                                  obj_feat, atr_feat, 
                                                  y_pred_obj, y_pred_atr, 
                                                  vocab, inv_vocab, ans_vocab, 
-                                                 train_params['mode'])
+                                                 train_params['mode'], True)
+
+    pdb.set_trace()
     pred_rel_score_vec = tf.reshape(pred_rel_score, 
                                     [1, batch_size*ans_io_helper.num_proposals])
 
@@ -246,8 +248,8 @@ def train(train_params):
                                            ans_io_helper.num_proposals, 
                                            len(ans_vocab))
     
-#    cross_entropy = graph_creator.loss(y, y_avg)
-    margin_loss = graph_creator.margin_loss(y, y_avg, 0.2)
+    cross_entropy = graph_creator.loss(y, y_avg)
+    #margin_loss = graph_creator.margin_loss(y, y_avg, 1)
     accuracy = graph_creator.evaluation(y, y_avg)
     
     # Collect variables
@@ -290,7 +292,7 @@ def train(train_params):
     #              1e-5 * regularizer_ans_fcs + \
     #              1e-3 * regularizer_ans_filters
 
-    total_loss = margin_loss + \
+    total_loss = cross_entropy + \
                  1e-5 * regularizer_ans_word_vecs + \
                  1e-5 * regularizer_ans_fcs + \
                  1e-3 * regularizer_ans_filters
diff --git a/classifiers/answer_classifier/train_ans_classifier_simple.py b/classifiers/answer_classifier/train_ans_classifier_simple.py
new file mode 100644
index 0000000000000000000000000000000000000000..b787ac53a99796b9eddd406a53989afd03fa36b5
--- /dev/null
+++ b/classifiers/answer_classifier/train_ans_classifier_simple.py
@@ -0,0 +1,343 @@
+import sys
+import os
+import json
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import numpy as np
+import math
+import random
+import pdb
+import tensorflow as tf
+import object_classifiers.obj_data_io_helper as obj_data_loader
+import attribute_classifiers.atr_data_io_helper as atr_data_loader
+import tf_graph_creation_helper as graph_creator
+import ans_graph_creator
+import plot_helper as plotter
+import ans_data_io_helper as ans_io_helper
+import region_ranker.perfect_ranker as region_proposer 
+import time
+
+val_start_id = 89645
+val_set_size = 5000
+val_set_size_small = 500
+
+
+def evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
+             image_dir, mean_image, start_index, val_set_size, batch_size,
+             plholder_dict, img_height, img_width, batch_creator, 
+             parsed_q_dict):
+    
+    correct = 0
+    max_iter = int(math.floor(val_set_size/batch_size))
+    for i in xrange(max_iter):
+        region_images, ans_labels, parsed_q, \
+        region_score, partition= batch_creator \
+            .ans_mini_batch_loader(qa_anno_dict, region_anno_dict, 
+                                   ans_vocab, vocab, image_dir, mean_image, 
+                                   start_index+i*batch_size, batch_size, 
+                                   parsed_q_dict, 
+                                   img_height, img_width, 3)
+            
+        feed_dict = ans_io_helper.\
+                    AnsFeedDictCreator(region_images, ans_labels, parsed_q, 
+                                       region_score, 1.0, plholder_dict, 
+                                       vocab, False).feed_dict
+
+        correct = correct + accuracy.eval(feed_dict)
+
+    return correct/max_iter
+
+
+def train(train_params):
+    sess = tf.InteractiveSession()
+    
+    train_anno_filename = train_params['train_json']
+    test_anno_filename = train_params['test_json']
+    parsed_q_filename = train_params['parsed_q_json']
+    regions_anno_filename = train_params['regions_json']
+    image_dir = train_params['image_dir']
+    image_regions_dir = train_params['image_regions_dir']
+    outdir = train_params['outdir']
+    rel_model = train_params['rel_model']
+    obj_atr_model = train_params['obj_atr_model']
+    batch_size = train_params['batch_size']
+
+    if not os.path.exists(outdir):
+        os.mkdir(outdir)
+
+    qa_anno_dict = ans_io_helper.parse_qa_anno(train_anno_filename)
+    parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
+    region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
+    ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
+    vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
+#    vocab = ans_io_helper.join_vocab(vocab, ans_vocab)
+
+    # Save region crops
+    if train_params['crop_n_save_regions'] == True:
+        qa_anno_dict_test = ans_io_helper.parse_qa_anno(test_anno_filename)
+        ans_io_helper.save_regions(image_dir, image_regions_dir,
+                                   qa_anno_dict, region_anno_dict,
+                                   1, 94644, 75, 75)
+        ans_io_helper.save_regions(image_dir, image_regions_dir,
+                                   qa_anno_dict_test, region_anno_dict,
+                                   94645, 143495-94645+1, 75, 75) 
+
+    # Create graph
+    g = tf.get_default_graph()
+    plholder_dict = graph_creator.placeholder_inputs_ans(len(vocab), 
+                                                         len(ans_vocab), 
+                                                         mode='gt')
+
+    image_regions = plholder_dict['image_regions']
+    questions = plholder_dict['questions']
+    keep_prob = plholder_dict['keep_prob']
+    y = plholder_dict['gt_answer']
+    region_score = plholder_dict['region_score']
+
+    y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
+    obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
+    obj_feat = obj_feat_op.outputs[0]
+    y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
+    atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
+    atr_feat = atr_feat_op.outputs[0]
+
+    pred_rel_score = graph_creator.rel_comp_graph(plholder_dict,
+                                                  obj_feat, atr_feat,
+                                                  y_pred_obj, y_pred_atr, 
+                                                  'q_obj_atr_reg_explt',
+                                                  1.0, len(vocab), 
+                                                  batch_size)
+
+    # Restore rel, obj and attribute classifier parameters
+    rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel')
+    obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj')
+    atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr')
+
+    obj_atr_saver = tf.train.Saver(obj_vars+atr_vars)
+    rel_saver = tf.train.Saver(rel_vars)
+
+    rel_saver.restore(sess, rel_model)
+    obj_atr_saver.restore(sess, obj_atr_model)
+
+    ans_graph = ans_graph_creator.ans_graph_creator(plholder_dict,
+                                                    obj_feat,
+                                                    atr_feat,
+                                                    y_pred_obj,
+                                                    y_pred_atr,
+                                                    vocab,
+                                                    inv_vocab,
+                                                    ans_vocab,
+                                                    batch_size,
+                                                    graph_creator.graph_config,
+                                                    train_params['mode'],
+                                                    True)
+
+    y_pred = ans_graph.cosine_dist
+
+    pred_rel_score_vec = tf.reshape(pred_rel_score, 
+                                    [1, batch_size*ans_io_helper.num_proposals])
+
+    y_avg = graph_creator.aggregate_y_pred(y_pred, 
+                                           pred_rel_score_vec, batch_size, 
+                                           ans_io_helper.num_proposals, 
+                                           len(ans_vocab))
+    
+#    cross_entropy = graph_creator.loss(y, y_avg)
+    total_loss = graph_creator.margin_loss(y, y_avg, 0.1)
+    accuracy = graph_creator.evaluation(y, y_avg)
+    
+    # Collect variables
+    ans_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans')
+    vars_to_regularize = tf.get_collection('regularize')
+     
+    for var in vars_to_regularize:
+        print var.name
+        total_loss += 1e-4 * tf.nn.l2_loss(var)
+
+    # Model to restore some of the weights from
+    if train_params['mode']=='q':
+        partial_model = ''
+
+    elif train_params['mode']=='q_obj_atr' or \
+         train_params['mode']=='q_reg':
+        partial_model = os.path.join(outdir, 'ans_classifier_q-' + \
+                                     str(train_params['start_model']))
+
+    elif train_params['mode']=='q_obj_atr_reg':
+        partial_model = os.path.join(outdir, 'ans_classifier_q_obj_atr-' + \
+                                     str(train_params['start_model']))
+
+    # Fine tune begining with a previous model
+    if train_params['fine_tune']==True:
+        partial_model = os.path.join(outdir, 'ans_classifier_' + \
+                                     train_params['mode'] + '-' + \
+                                     str(train_params['start_model']))
+        start_epoch = train_params['start_model']+1
+        
+        partial_restorer = tf.train.Saver()
+    else:
+        start_epoch = 0
+        if train_params['mode']!='q':
+            partial_restorer = tf.train.Saver()
+
+    # Restore partial model
+    if os.path.exists(partial_model):
+        partial_restorer.restore(sess, partial_model)
+
+    # Save trained vars
+    model_saver = tf.train.Saver()
+    all_vars_without_optim = tf.all_variables()
+
+    # Attach optimization ops
+    word_vecs = tf.get_collection('variables','ans/word_vecs')
+    # vars_to_train = [var for var in ans_vars if 
+    #                  'ans/word_vecs' not in var.name]
+    vars_to_train = ans_vars
+    train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \
+                         .minimize(total_loss, var_list = vars_to_train)
+    
+    # Initialize vars_to_init
+    all_vars = tf.all_variables()
+    optimizer_vars = [var for var in all_vars if var not in 
+                      all_vars_without_optim]
+    
+    print('Optimizer Variables: ')
+    print([var.name for var in optimizer_vars])
+    print('------------------')
+
+    if train_params['mode']=='q':
+        vars_to_init = ans_vars + optimizer_vars
+    else:
+        vars_to_init = optimizer_vars
+
+    sess.run(tf.initialize_variables(vars_to_init))
+    
+    # Load mean image
+    mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
+                         'Obj_Classifier/mean_image.npy')
+
+    # Start Training
+    max_epoch = train_params['max_epoch']
+    max_iter = 4400*2
+    val_acc_array_epoch = np.zeros([max_epoch])
+    train_acc_array_epoch = np.zeros([max_epoch])
+
+    # Batch creators
+    train_batch_creator = ans_io_helper.batch_creator(1, max_iter*batch_size)
+    val_batch_creator = ans_io_helper.batch_creator(val_start_id, val_start_id 
+                                                    + val_set_size - 1)
+    val_small_batch_creator = ans_io_helper.batch_creator(val_start_id, 
+                                                          val_start_id + 
+                                                          val_set_size_small-1)
+
+    # Check accuracy of restored model
+    # if train_params['fine_tune']==True:
+        # restored_accuracy = evaluate(accuracy, qa_anno_dict, 
+        #                              region_anno_dict, ans_vocab, 
+        #                              vocab, image_regions_dir, 
+        #                              mean_image, val_start_id, 
+        #                              val_set_size, batch_size,
+        #                              plholder_dict, 75, 75,
+        #                              val_batch_creator,
+        #                              parsed_q_dict)
+        # print('Accuracy of restored model: ' + str(restored_accuracy))
+    
+    # Accuracy filename
+    train_accuracy_txtfile = os.path.join(outdir,'train_accuracy_' + \
+                                          train_params['mode'] + '.txt')
+    val_accuracy_txtfile = os.path.join(outdir,'val_accuracy_' + \
+                                        train_params['mode'] + '.txt')
+
+    for epoch in range(start_epoch, max_epoch):
+        train_batch_creator.shuffle_ids()
+        for i in range(max_iter):
+            train_region_images, train_ans_labels, train_parsed_q, \
+            train_region_score, train_partition= train_batch_creator \
+                .ans_mini_batch_loader(qa_anno_dict, region_anno_dict, 
+                                       ans_vocab, vocab, 
+                                       image_regions_dir, mean_image, 
+                                       1+i*batch_size, batch_size,
+                                       parsed_q_dict,
+                                       75, 75, 3)
+
+            feed_dict_train = ans_io_helper \
+                .AnsFeedDictCreator(train_region_images, 
+                                    train_ans_labels, 
+                                    train_parsed_q,
+                                    train_region_score,
+                                    0.5, 
+                                    plholder_dict,
+                                    vocab,
+                                    True).feed_dict            
+
+            _, current_train_batch_acc, y_avg_eval, loss_eval = \
+                    sess.run([train_step, accuracy, y_avg, total_loss], 
+                             feed_dict=feed_dict_train)
+                
+            # print(y_avg_eval[0,:])
+            # print(train_ans_labels[0,:])
+
+#            rel_logits = g.get_operation_by_name('rel/fc2/vec_logits')
+#            print(rel_logits.outputs[0].eval(feed_dict_train))
+#            print (pred_rel_score.eval(feed_dict_train))
+
+            assert (not np.any(np.isnan(y_avg_eval))), 'NaN predicted'
+
+            train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] + \
+                                           current_train_batch_acc
+        
+            if (i+1)%500==0:
+                val_accuracy = evaluate(accuracy, qa_anno_dict, 
+                                        region_anno_dict, ans_vocab, vocab,
+                                        image_regions_dir, mean_image, 
+                                        val_start_id, val_set_size_small,
+                                        batch_size, plholder_dict, 75, 75,
+                                        val_small_batch_creator,
+                                        parsed_q_dict)
+                
+                print('Iter: ' + str(i+1) + ' Val Sm Acc: ' + str(val_accuracy))
+
+        train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] / max_iter
+        val_acc_array_epoch[epoch] = evaluate(accuracy, qa_anno_dict, 
+                                              region_anno_dict, ans_vocab, 
+                                              vocab, image_regions_dir, 
+                                              mean_image, val_start_id, 
+                                              val_set_size, batch_size,
+                                              plholder_dict, 75, 75,
+                                              val_batch_creator,
+                                              parsed_q_dict)
+
+        print('Val Acc: ' + str(val_acc_array_epoch[epoch]) + 
+              ' Train Acc: ' + str(train_acc_array_epoch[epoch]))
+        
+        
+        if train_params['fine_tune']==True:
+            plot_path  = os.path.join(outdir, 'acc_vs_epoch_' \
+                                + train_params['mode'] + '_fine_tuned.pdf')
+        else:
+            plot_path = os.path.join(outdir, 'acc_vs_epoch_' \
+                                + train_params['mode'] + '.pdf')
+
+        plotter.write_accuracy_to_file(start_epoch, epoch, 
+                                       train_acc_array_epoch,
+                                       train_params['fine_tune'],
+                                       train_accuracy_txtfile)
+        plotter.write_accuracy_to_file(start_epoch, epoch, 
+                                       val_acc_array_epoch,
+                                       train_params['fine_tune'],
+                                       val_accuracy_txtfile)
+        plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1,
+                                ydata_train=train_acc_array_epoch[0:epoch + 1], 
+                                ydata_val=val_acc_array_epoch[0:epoch + 1], 
+                                xlim=[1, max_epoch], ylim=[0, 1.0], 
+                                savePath=plot_path)
+
+        save_path = model_saver.save(sess, 
+                                     os.path.join(outdir, 'ans_classifier_' + \
+                                     train_params['mode']), global_step=epoch)
+
+    sess.close()
+    tf.reset_default_graph()
+    
+if __name__=='__main__':
+    print 'Hello'
diff --git a/classifiers/region_ranker/train_rel_classifier.py b/classifiers/region_ranker/train_rel_classifier.py
index a7f593174f8a6772d0b62dcb41e952f356067643..6ae315b94099acbba0b66229ab17458d1aafe20e 100644
--- a/classifiers/region_ranker/train_rel_classifier.py
+++ b/classifiers/region_ranker/train_rel_classifier.py
@@ -67,6 +67,7 @@ def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, parsed_q_dict,
                                 vocab).feed_dict
 
         region_score_pred_eval = region_score_pred.eval(feed_dict)
+        print region_score_pred_eval
     
         recall_at_k += batch_recall(region_score_pred_eval, 
                                     region_scores, -1)
diff --git a/classifiers/region_ranker/train_rel_classifier_simple.py b/classifiers/region_ranker/train_rel_classifier_simple.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ae315b94099acbba0b66229ab17458d1aafe20e
--- /dev/null
+++ b/classifiers/region_ranker/train_rel_classifier_simple.py
@@ -0,0 +1,342 @@
+import sys
+import os
+import json
+import math
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import numpy as np
+import pdb
+import tensorflow as tf
+import answer_classifier.ans_data_io_helper as ans_io_helper
+import region_ranker.perfect_ranker as region_proposer
+import tf_graph_creation_helper as graph_creator
+import plot_helper as plotter
+
+val_start_id = 89645
+val_set_size = 5000
+val_set_size_small = 500
+
+def recall(pred_scores, gt_scores, k):
+    inc_order = np.argsort(pred_scores, 0)
+    dec_order = inc_order[::-1]
+    gt_scores_ordered = gt_scores[dec_order]
+    rel_reg_recalled = np.sum(gt_scores_ordered[0:k]!=0)
+    rel_reg = np.sum(gt_scores!=0)
+    return rel_reg_recalled/(rel_reg+0.00001)
+
+
+def batch_recall(pred_scores, gt_scores, k):
+    batch_size = pred_scores.shape[0]
+    batch_recall = 0.0
+    for i in xrange(batch_size):
+        if k==-1:
+            k_ = np.sum(gt_scores[i,:]!=0)
+        else:
+            k_ = k
+        batch_recall += recall(pred_scores[i,:], gt_scores[i,:], k_)
+
+    batch_recall = batch_recall/batch_size
+
+    return batch_recall
+
+def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, parsed_q_dict,
+             ans_vocab, vocab, image_dir, mean_image, start_index, val_set_size,
+             batch_size, plholder_dict, img_height, img_width, batch_creator, 
+             verbose=False):
+    
+    recall_at_k = 0
+    max_iter = int(math.floor(val_set_size/batch_size))
+    for i in xrange(max_iter):
+        if verbose==True:
+            print('Iter: ' + str(i+1) + '/' + str(max_iter))
+        region_images, ans_labels, parsed_q, \
+        region_scores_vec, partition= batch_creator \
+            .ans_mini_batch_loader(qa_anno_dict, region_anno_dict, 
+                                   ans_vocab, vocab, image_dir, mean_image, 
+                                   start_index+i*batch_size, batch_size, 
+                                   parsed_q_dict,
+                                   img_height, img_width, 3)
+        region_scores = batch_creator.reshape_score(region_scores_vec)
+
+        feed_dict = ans_io_helper \
+            .RelFeedDictCreator(region_images, 
+                                parsed_q,
+                                region_scores,
+                                1.0, 
+                                plholder_dict,
+                                vocab).feed_dict
+
+        region_score_pred_eval = region_score_pred.eval(feed_dict)
+        print region_score_pred_eval
+    
+        recall_at_k += batch_recall(region_score_pred_eval, 
+                                    region_scores, -1)
+        
+    recall_at_k /= max_iter
+
+    return recall_at_k
+
+
+def train(train_params):
+    sess = tf.InteractiveSession()
+    train_anno_filename = train_params['train_json']
+    test_anno_filename = train_params['test_json']
+    parsed_q_filename = train_params['parsed_q_json']
+    regions_anno_filename = train_params['regions_json']
+    image_dir = train_params['image_dir']
+    image_regions_dir = train_params['image_regions_dir']
+    outdir = train_params['outdir']
+    batch_size = train_params['batch_size']
+    obj_atr_model = train_params['obj_atr_model']
+    mode = train_params['mode']
+
+    if not os.path.exists(outdir):
+        os.mkdir(outdir)
+
+    qa_anno_dict = ans_io_helper.parse_qa_anno(train_anno_filename)
+    parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
+    region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
+    ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
+    vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
+
+    # Save region crops
+    if train_params['crop_n_save_regions'] == True:
+        qa_anno_dict_test = ans_io_helper.parse_qa_anno(test_anno_filename)
+        ans_io_helper.save_regions(image_dir, image_regions_dir,
+                                   qa_anno_dict, region_anno_dict,
+                                   1, 94644, 75, 75)
+        ans_io_helper.save_regions(image_dir, image_regions_dir,
+                                   qa_anno_dict_test, region_anno_dict,
+                                   94645, 143495-94645+1, 75, 75)
+
+    
+    # Create graph
+    g = tf.get_default_graph()
+    plholder_dict = \
+        graph_creator.placeholder_inputs_rel(ans_io_helper.num_proposals,
+                                             len(vocab), mode='gt')
+    image_regions = plholder_dict['image_regions']
+    y = plholder_dict['gt_scores']
+    keep_prob = plholder_dict['keep_prob']
+
+    y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
+    obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
+    obj_feat = obj_feat_op.outputs[0]
+    y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
+    atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
+    atr_feat = atr_feat_op.outputs[0]
+    y_pred = graph_creator.rel_comp_graph(plholder_dict,
+                                          obj_feat, atr_feat,
+                                          y_pred_obj, y_pred_atr, mode,
+                                          keep_prob, len(vocab), batch_size)
+
+    accuracy = graph_creator.evaluation(y, y_pred)
+    
+    cross_entropy = graph_creator.loss(y, y_pred)
+
+    # Collect variables
+    params_varnames = [
+        'rel/word_embed/word_vecs',
+        'rel/conv1/W',
+        'rel/conv2/W',
+        'rel/conv1/b',
+        'rel/conv2/b',
+        'rel/fc1/W_reg',
+        'rel/fc1/W_q',
+        'rel/fc1/W_obj',
+        'rel/fc1/W_atr',
+        'rel/fc1/W_explt',
+        'rel/fc1/b',
+        'rel/fc2/W',
+        'rel/fc2/b',
+    ]
+
+    vars_dict = graph_creator.get_list_of_variables(params_varnames)
+
+    # parameters grouped together
+    rel_word_params = [
+        vars_dict['rel/word_embed/word_vecs'],
+    ]
+    
+    rel_conv_params = [
+        vars_dict['rel/conv1/W'],
+        vars_dict['rel/conv2/W'],
+    ]
+ 
+    rel_fc_params = [
+        vars_dict['rel/fc1/W_reg'],
+        vars_dict['rel/fc1/W_q'],
+        vars_dict['rel/fc1/W_obj'],
+        vars_dict['rel/fc1/W_atr'],
+        vars_dict['rel/fc1/W_explt'],
+        vars_dict['rel/fc2/W'],
+    ]
+
+    # Regularization
+    regularizer_rel_word_vecs = graph_creator.regularize_params(rel_word_params)
+    regularizer_rel_filters = graph_creator.regularize_params(rel_conv_params)
+    regularizer_rel_fcs = graph_creator.regularize_params(rel_fc_params)
+    
+    total_loss = cross_entropy + \
+                 1e-4 * regularizer_rel_word_vecs + \
+                 1e-3 * regularizer_rel_filters + \
+                 1e-4 * regularizer_rel_fcs
+
+    # Restore weights
+    obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj')
+    atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr')
+    rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel')
+
+    vars_to_save = rel_vars + atr_vars + obj_vars
+    vars_to_train = rel_vars[:]
+    pretrained_vars = atr_vars + obj_vars
+
+    # Model to save and restore weights from
+    model_saver = tf.train.Saver(vars_to_save)
+    
+    if train_params['fine_tune']==True:
+        pretrained_model = os.path.join(outdir, 'rel_classifier_' + mode +'-'+ \
+                                        str(train_params['start_model']))
+        assert (os.path.exists(pretrained_model)), \
+            'Pretrained model does not exist'
+        model_saver.restore(sess, pretrained_model)
+        pretrained_vars = vars_to_save[:]
+        start_epoch = train_params['start_model'] + 1
+    else:
+        assert (os.path.exists(obj_atr_model)), \
+            'Obj_Atr model does not exist'
+        obj_atr_restorer = tf.train.Saver(pretrained_vars)
+        obj_atr_restorer.restore(sess, obj_atr_model)
+        start_epoch = 0
+
+    # Attach optimization ops
+    train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \
+                         .minimize(total_loss, var_list=vars_to_train)
+
+    # Initialize uninitialized vars
+    all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
+    vars_to_init = [var for var in all_vars if var not in pretrained_vars]
+    sess.run(tf.initialize_variables(vars_to_init))
+
+    print('-----------------')
+    print 'Variables to train:'
+    print [var.name for var in vars_to_train]
+    print('-----------------')
+    print 'Pretrained variables:'
+    print [var.name for var in pretrained_vars]
+    print('-----------------')
+    print 'Variables to initialize:'
+    print [var.name for var in vars_to_init]
+    print('-----------------')
+    print 'Variables to save'
+    print [var.name for var in vars_to_save]
+    print('-----------------')
+
+    # Load mean image
+    mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
+                         'Obj_Classifier/mean_image.npy')
+
+    # Start Training
+    max_epoch = train_params['max_epoch']
+    max_iter = 4400*2
+    val_rec_array_epoch = np.zeros([max_epoch])
+    train_rec_array_epoch = np.zeros([max_epoch])
+
+    # Batch creators
+    train_batch_creator = ans_io_helper.batch_creator(1, max_iter*batch_size)
+    val_batch_creator = ans_io_helper.batch_creator(val_start_id, val_start_id 
+                                                    + val_set_size - 1)
+    val_small_batch_creator = ans_io_helper.batch_creator(val_start_id, 
+                                                          val_start_id + 
+                                                          val_set_size_small-1)
+
+    # Check accuracy of restored model
+    if train_params['fine_tune']==True:
+        restored_recall = evaluate(y_pred, qa_anno_dict, region_anno_dict, 
+                                   parsed_q_dict, ans_vocab, 
+                                   vocab, image_regions_dir, 
+                                   mean_image, val_start_id, 
+                                   val_set_size, batch_size,
+                                   plholder_dict, 75, 75,
+                                   val_batch_creator)
+        print('Recall of restored model: ' + str(restored_recall))
+    
+    # Accuracy filename
+    train_recall_txtfile = os.path.join(outdir,'train_recall_'+ mode +'.txt')
+    val_recall_txtfile = os.path.join(outdir,'val_recall_'+ mode +'.txt')
+                 
+    for epoch in range(start_epoch, max_epoch):
+        train_batch_creator.shuffle_ids()
+        for i in range(max_iter):
+        
+            train_region_images, train_ans_labels, train_parsed_q, \
+            train_region_score_vec, train_partition= train_batch_creator \
+                .ans_mini_batch_loader(qa_anno_dict, region_anno_dict, 
+                                       ans_vocab, vocab, 
+                                       image_regions_dir, mean_image, 
+                                       1+i*batch_size, batch_size,
+                                       parsed_q_dict,
+                                       75, 75, 3)
+
+            train_region_score = train_batch_creator \
+                .reshape_score(train_region_score_vec)
+
+            feed_dict_train = ans_io_helper \
+                .RelFeedDictCreator(train_region_images, 
+                                    train_parsed_q,
+                                    train_region_score,
+                                    0.5, 
+                                    plholder_dict,
+                                    vocab).feed_dict
+
+            _, current_train_batch_acc, y_pred_eval, loss_eval = \
+                    sess.run([train_step, accuracy, y_pred, total_loss], 
+                             feed_dict=feed_dict_train)
+
+            assert (not np.any(np.isnan(y_pred_eval))), 'NaN predicted'
+
+            train_rec_array_epoch[epoch] = train_rec_array_epoch[epoch] + \
+                                           batch_recall(y_pred_eval, 
+                                                        train_region_score, -1)
+        
+            if (i+1)%500==0:
+                val_recall = evaluate(y_pred, qa_anno_dict, region_anno_dict, 
+                                      parsed_q_dict, ans_vocab, vocab,
+                                      image_regions_dir, mean_image, 
+                                      val_start_id, val_set_size_small,
+                                      batch_size, plholder_dict, 75, 75,
+                                      val_small_batch_creator)
+                
+                print('Iter: ' + str(i+1) + ' Val Sm Rec: ' + str(val_recall))
+
+        train_rec_array_epoch[epoch] = train_rec_array_epoch[epoch] / max_iter
+        val_rec_array_epoch[epoch] = evaluate(y_pred, qa_anno_dict, 
+                                              region_anno_dict, parsed_q_dict, 
+                                              ans_vocab, vocab, 
+                                              image_regions_dir, mean_image, 
+                                              val_start_id, val_set_size, 
+                                              batch_size, plholder_dict, 
+                                              75, 75, val_batch_creator)
+
+        print('Val Rec: ' + str(val_rec_array_epoch[epoch]) + 
+              ' Train Rec: ' + str(train_rec_array_epoch[epoch]))
+        
+        
+        plotter.write_accuracy_to_file(start_epoch, epoch, 
+                                       train_rec_array_epoch,
+                                       train_params['fine_tune'],
+                                       train_recall_txtfile)
+        plotter.write_accuracy_to_file(start_epoch, epoch, 
+                                       val_rec_array_epoch,
+                                       train_params['fine_tune'],
+                                       val_recall_txtfile)
+      
+        save_path = model_saver.save(sess, 
+                                     os.path.join(outdir, 'rel_classifier_' + \
+                                                  mode), 
+                                     global_step=epoch)
+
+    sess.close()
+    tf.reset_default_graph()
+
+
diff --git a/classifiers/rel_graph_creator.py b/classifiers/rel_graph_creator.py
new file mode 100644
index 0000000000000000000000000000000000000000..91fe8e5aa60c5087191be0e82b61b8db99ef2d20
--- /dev/null
+++ b/classifiers/rel_graph_creator.py
@@ -0,0 +1,172 @@
+import numpy as np
+import math
+import pdb
+import tensorflow as tf
+import answer_classifier.ans_data_io_helper as ans_io_helper
+from tf_graph_creation_helper import weight_variable, bias_variable, \
+    q_bin_embed_graph, conv2d, max_pool_2x2, batchnorm
+
+
+class ans_graph_creator():
+    def __init__(self, 
+                 plholder_dict,
+                 obj_feat, 
+                 atr_feat, 
+                 obj_prob, 
+                 atr_prob,
+                 vocab, 
+                 inv_vocab, 
+                 ans_vocab,
+                 batch_size,
+                 graph_config,
+                 mode='q_obj_atr', 
+                 is_train=True):
+        
+        self.mode = mode
+        self.is_train = plholder_dict['is_train']
+        self.keep_prob = plholder_dict['keep_prob']
+        image_regions = plholder_dict['image_regions']
+        vocab_size = len(vocab)
+
+        with tf.name_scope('ans') as ans_graph:
+            # Word Vectors
+            word_vecs = self.create_word_vecs(vocab_size, 
+                                              graph_config['word_vec_dim'])
+
+            # Feature Computations
+            q_feat = self.add_q_feat_comp_layer(word_vecs, plholder_dict)
+            reg_feat = self.add_reg_feat_comp_layer(image_regions)
+
+            # Feature Projections (with batch norm)
+            feat_proj_dim = graph_config['joint_embed_dim']
+            proj_feat = dict()
+
+            proj_feat['q'] = self.fc_layer(q_feat, feat_proj_dim,
+                                           'q_feat_proj_layer')
+        
+            proj_feat['reg'] = self.fc_layer(reg_feat, feat_proj_dim,
+                                             'reg_feat_proj_layer')
+
+            proj_feat['obj'] = self.fc_layer(obj_feat, feat_proj_dim,
+                                             'obj_feat_proj_layer')
+            
+            proj_feat['atr'] = self.fc_layer(atr_feat, feat_proj_dim,
+                                             'atr_feat_proj_layer') 
+        
+            # Feature Combination
+            coeffs = self.mixing_coeffs()
+            print coeffs
+            num_regions = batch_size*ans_io_helper.num_proposals
+            comb_feat = tf.zeros(shape=[num_regions, feat_proj_dim], 
+                                 dtype=tf.float32)
+            for feat_type, feat in proj_feat.items():
+                comb_feat = comb_feat + feat * coeffs[feat_type]
+                
+            # Answer feature
+            ans_feat = self.compute_ans_feat(word_vecs, vocab, ans_vocab)
+
+            # Proj answer
+            proj_ans_feat = self.fc_layer(ans_feat, feat_proj_dim,
+                                          'ans_feat_proj_layer')
+
+            # Compute Cosine Distance
+            self.cosine_dist = self.compute_cosine_dist(comb_feat, 
+                                                        proj_ans_feat)
+                
+    def create_word_vecs(self, vocab_size, word_vec_dim):
+        word_vecs = weight_variable([vocab_size,
+                                     word_vec_dim],
+                                    var_name='word_vecs')
+        word_vecs = tf.nn.l2_normalize(word_vecs, 1)
+        tf.add_to_collection('regularize',word_vecs)
+        return word_vecs
+        
+    def add_q_feat_comp_layer(self, word_vecs, plholder_dict):
+        with tf.name_scope('q_feat_comp_layer') as q_feat_comp_layer:
+            bin0_embed = q_bin_embed_graph('bin0', word_vecs, plholder_dict)
+            bin1_embed = q_bin_embed_graph('bin1', word_vecs, plholder_dict)
+            bin2_embed = q_bin_embed_graph('bin2', word_vecs, plholder_dict)
+            bin3_embed = q_bin_embed_graph('bin3', word_vecs, plholder_dict)
+            q_feat = tf.concat(1, 
+                               [bin0_embed, bin1_embed, bin2_embed, bin3_embed],
+                               name='q_feat')
+        return q_feat
+
+    def add_reg_feat_comp_layer(self, image_regions):
+        with tf.name_scope('reg_feat_comp_layer') as reg_feat_comp_layer:
+            with tf.name_scope('conv1') as conv1:
+                W_conv1 = weight_variable([5,5,3,4])
+                b_conv1 = bias_variable([4])
+                a_conv1 = tf.add(conv2d(image_regions, W_conv1), 
+                                 b_conv1, name='a')
+                h_conv1 = tf.nn.relu(a_conv1, name='h')
+                h_pool1 = max_pool_2x2(h_conv1)
+                h_conv1_drop = tf.nn.dropout(h_pool1, self.keep_prob, 
+                                             name='h_pool_drop')
+
+            with tf.name_scope('conv2') as conv2:
+                W_conv2 = weight_variable([3,3,4,8])
+                b_conv2 = bias_variable([8])
+                a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a')
+                h_conv2 = tf.nn.relu(a_conv2, name='h')
+                h_pool2 = max_pool_2x2(h_conv2)
+                h_pool2_drop = tf.nn.dropout(h_pool2, self.keep_prob, 
+                                             name='h_pool_drop')
+                h_pool2_drop_shape = h_pool2_drop.get_shape()
+                reg_feat_dim = reduce(lambda f, g: f*g, 
+                                      [dim.value for dim in 
+                                       h_pool2_drop_shape[1:]])
+                reg_feat = tf.reshape(h_pool2_drop, [-1, reg_feat_dim], 
+                                      name='reg_feat')
+
+            tf.add_to_collection('regularize', W_conv1)
+            tf.add_to_collection('regularize', W_conv2)
+        
+        return reg_feat
+
+    def fc_layer(self, feat, proj_dim, name_scope):
+        with tf.name_scope(name_scope) as fc_layer:
+            feat_dim = feat.get_shape()[1].value
+            W1 = weight_variable([feat_dim, proj_dim])
+            b1 = bias_variable([proj_dim])
+            proj_feat = tf.add(tf.matmul(feat, W1), b1)
+            bn_proj_feat = batchnorm(proj_feat, None, self.is_train)
+            W2 = weight_variable([proj_dim, proj_dim])
+            b2 = bias_variable([proj_dim])
+            bn_proj_feat = tf.add(tf.matmul(tf.nn.relu(bn_proj_feat), W2), b2)
+        tf.add_to_collection('regularize', W1)
+        tf.add_to_collection('regularize', W2)
+
+        return bn_proj_feat
+        
+    def mixing_coeffs(self):
+        feat_types = ['q', 'obj', 'atr', 'reg']
+        coeffs = dict()
+        count = 0;
+        for feat_type in feat_types:
+            if feat_type in self.mode:
+                coeffs[feat_type] = 1.0
+                count += 1
+            else:
+                coeffs[feat_type] = 0.0
+        coeffs = {k: v/count for k, v in coeffs.items()}
+        return coeffs
+          
+    def compute_ans_feat(self, word_vecs, vocab, ans_vocab):
+        ans_vocab_size = len(ans_vocab)
+        inv_ans_vocab = {v:k for k, v in ans_vocab.items()}
+        ans_in_vocab_ids_list = []
+        for i in xrange(ans_vocab_size):
+            ans_in_vocab_ids_list.append(vocab[inv_ans_vocab[i]])
+
+        ans_in_vocab_ids_tensor = tf.constant(ans_in_vocab_ids_list, 
+                                              dtype=tf.int64)
+        ans_feat = tf.nn.embedding_lookup(word_vecs, ans_in_vocab_ids_tensor,
+                                          name='ans_feat')
+        return ans_feat
+
+    def compute_cosine_dist(self, feat1, feat2):
+        feat1 = tf.nn.l2_normalize(feat1, 1)
+        feat2 = tf.nn.l2_normalize(feat2, 1)
+        return tf.matmul(feat1, tf.transpose(feat2))
+    
diff --git a/classifiers/tf_graph_creation_helper.py b/classifiers/tf_graph_creation_helper.py
index 0a8c93f49cf937ba4a71265f20c38c2ee30f2182..9b2d36b1c732340ac0349bbf60881edeea933c32 100644
--- a/classifiers/tf_graph_creation_helper.py
+++ b/classifiers/tf_graph_creation_helper.py
@@ -14,6 +14,7 @@ graph_config = {
     'q_embed_dim': 200,
     'ans_fc1_dim': 300,
     'rel_fc1_dim': 100,
+    'joint_embed_dim': 100,
 }
 
 def get_variable(var_scope):
@@ -108,6 +109,8 @@ def placeholder_inputs_ans(total_vocab_size, ans_vocab_size, mode='gt'):
                                     'questions'),
         'region_score': tf.placeholder(tf.float32, [1,None], 
                                        'region_score'),
+
+        'is_train': tf.placeholder(tf.bool, [], 'is_train')
     }
     for i in xrange(4):
         bin_name = 'bin' + str(i)
@@ -322,6 +325,7 @@ def rel_comp_graph(plholder_dict, obj_feat, atr_feat,
             a_atr_fc1 = tf.matmul(atr_feat, W_atr_fc1, name='a_atr_fc1')
             a_explt_fc1 = tf.matmul(concat_explt_feat, W_explt_fc1,
                                     name='a_explt_fc1')
+            
             coeff = {
                 'reg': 0.0,
                 'q': 0.0,
@@ -486,7 +490,7 @@ def ans_comp_graph(plholder_dict, obj_feat, atr_feat,
 
 
 def ans_comp_margin_graph(plholder_dict, obj_feat, atr_feat, obj_prob, atr_prob,
-                          vocab, inv_vocab, ans_vocab, mode):
+                          vocab, inv_vocab, ans_vocab, mode, train):
     vocab_size = len(vocab)
     image_regions = plholder_dict['image_regions']
     keep_prob = plholder_dict['keep_prob']
@@ -518,7 +522,7 @@ def ans_comp_margin_graph(plholder_dict, obj_feat, atr_feat, obj_prob, atr_prob,
             
             ans_embed = tf.nn.embedding_lookup(word_vecs, ans_in_vocab_ids_list,
                                                name='ans_embed')
-
+            
         with tf.name_scope('explicit_feat') as expl_feat:
             explt_feat_list = []
             for bin_num in xrange(4):
@@ -582,6 +586,13 @@ def ans_comp_margin_graph(plholder_dict, obj_feat, atr_feat, obj_prob, atr_prob,
             a_fc1_q = tf.matmul(q_feat, W_q_fc1, name='a_fc1_q')
             a_explt_fc1 = tf.matmul(concat_explt_feat, W_explt_fc1,
                                     name='a_explt_fc1')        
+
+            a_fc1_region = batchnorm(a_fc1_region, 'reg', train)
+            a_fc1_obj = batchnorm(a_fc1_obj, 'obj', train)
+            a_fc1_atr = batchnorm(a_fc1_atr, 'atr', train)
+            a_fc1_q = batchnorm(a_fc1_q, 'q', train)
+            a_explt_fc1 = batchnorm(a_explt_fc1, 'explt', train)
+        
             coeff_reg = 0.0
             coeff_obj = 0.0
             coeff_atr = 0.0
@@ -611,7 +622,8 @@ def ans_comp_margin_graph(plholder_dict, obj_feat, atr_feat, obj_prob, atr_prob,
             a_fc1 = coeff_reg * a_fc1_region + \
                     coeff_obj * a_fc1_obj + \
                     coeff_atr * a_fc1_atr + \
-                    coeff_q * a_fc1_q
+                    coeff_q * a_fc1_q + \
+                    coeff_explt * a_explt_fc1
             
             h_fc1 = tf.nn.relu(a_fc1, name='h')
             h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_drop')
@@ -631,12 +643,17 @@ def ans_comp_margin_graph(plholder_dict, obj_feat, atr_feat, obj_prob, atr_prob,
                                      b_feat_fc2, 
                                      name='comb_feat_embed')
             comb_ans_embed = tf.add(tf.matmul(ans_embed, W_ans_fc2), 
-                                     b_ans_fc2, 
-                                     name='comb_feat_embed')
+                                    b_ans_fc2, 
+                                    name='comb_feat_embed')
+            comb_feat_embed = batchnorm(comb_feat_embed, 'feat_embed', train)
+            comb_ans_embed = batchnorm(comb_ans_embed, 'ans_embed', train)
+        comb_feat_embed = tf.nn.l2_normalize(comb_feat_embed, 1)
+        comb_ans_embed = tf.nn.l2_normalize(comb_ans_embed,1)
         ans_scores = tf.matmul(comb_feat_embed, tf.transpose(comb_ans_embed), 
                                name='ans_scores')
-        ans_scores = tf.nn.l2_normalize(ans_scores, 1)*3.0
+        #ans_scores = tf.nn.l2_normalize(ans_scores, 1)*3.0
         return tf.nn.softmax(ans_scores)
+        
 
 
 def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals, 
@@ -669,7 +686,7 @@ def loss(y, y_pred):
 def margin_loss(y, y_pred, margin):
     correct_score = tf.reduce_sum(tf.mul(y, y_pred), 1, 
                                   keep_dims=True, name='correct_score')
-    return tf.reduce_mean(tf.maximum(0.0, y + margin - correct_score))
+    return tf.reduce_mean(tf.maximum(0.0, y_pred + margin - correct_score))
 
 
 def regularize_params(param_list):
@@ -679,6 +696,40 @@ def regularize_params(param_list):
     return regularizer
 
 
+def batchnorm(input, suffix, is_train, decay=0.95, epsilon=1e-4, name='bn'):
+  rank = len(input.get_shape().as_list())
+  in_dim = input.get_shape().as_list()[-1]
+
+  if rank == 2:
+      axes = [0] 
+  elif rank == 4:
+      axes = [0, 1, 2]
+  else:
+      raise ValueError('Input tensor must have rank 2 or 4.')
+  
+  if suffix:
+      suffix = '_' + suffix
+  else:
+      suffix = ''
+
+  mean, variance = tf.nn.moments(input, axes)
+  offset = tf.Variable(initial_value=tf.constant(value=0.0, shape=[in_dim]),
+                       name='offset' + suffix)
+  scale = tf.Variable(initial_value=tf.constant(value=1.0, shape=[in_dim]),
+                      name='scale' + suffix)
+
+  ema = tf.train.ExponentialMovingAverage(decay=decay)
+  ema_apply_op = ema.apply([mean, variance])
+  ema_mean, ema_var = ema.average(mean), ema.average(variance)
+
+  with tf.control_dependencies([ema_apply_op]):
+      bn_train = tf.nn.batch_normalization(input, mean, variance, 
+                                           offset, scale, epsilon, name)
+  bn_test = tf.nn.batch_normalization(input, ema_mean, ema_var, 
+                                      offset, scale, epsilon, name)
+  return tf.cond(is_train, lambda : bn_train, lambda : bn_test)
+
+
 if __name__ == '__main__':
     lg_dir = '/home/tanmay/Code/GenVQA/Exp_Results/lg_files/'
 
diff --git a/classifiers/train_classifiers.py b/classifiers/train_classifiers.py
index b8972ded3d2aada441c8e0f3a96ea759a0e1835f..46c1acd00521f8ab9bc6ed0e8ec6585a5d033cdc 100644
--- a/classifiers/train_classifiers.py
+++ b/classifiers/train_classifiers.py
@@ -1,3 +1,4 @@
+
 import sys
 import json
 import os
@@ -9,7 +10,8 @@ import object_classifiers.train_obj_classifier as obj_trainer
 import object_classifiers.eval_obj_classifier as obj_evaluator
 import attribute_classifiers.train_atr_classifier as atr_trainer
 import attribute_classifiers.eval_atr_classifier as atr_evaluator
-import answer_classifier.train_ans_classifier as ans_trainer
+#import answer_classifier.train_ans_classifier as ans_trainer
+import answer_classifier.train_ans_classifier_simple as ans_trainer
 import region_ranker.train_rel_classifier as rel_trainer
 import region_ranker.eval_rel_classifier as rel_evaluator
 
@@ -108,10 +110,10 @@ ans_classifier_train_params = {
     'adam_lr' : 0.0001,
     'mode' : 'q_obj_atr',
     'crop_n_save_regions': False,
-    'max_epoch': 10,
+    'max_epoch': 5,
     'batch_size': 10,
     'fine_tune': True,
-    'start_model': 4, # When fine_tune is false used to pre-initialize q_obj_atr with q model etc
+    'start_model': 1, # When fine_tune is false used to pre-initialize q_obj_atr with q model etc
 }
 
 if __name__=='__main__':