Merge branch 'master' of gitlab-beta.engr.illinois.edu:Vision/GenVQA

b5ad2ae0 · tgupta6 · 4aa6f545 · 3bfdbb2f · b5ad2ae0 · b5ad2ae0
Commit b5ad2ae0 authored 8 years ago by tgupta6
--- a/answer_classifier_cached_features/eval.py
+++ b/answer_classifier_cached_features/eval.py
@@ -59,7 +59,7 @@ def create_batch_generator(mode):
        constants.num_negative_answers,
        resnet_feat_dim=constants.resnet_feat_dim)

-    index_generator = tftools.data.random(
+    index_generator = tftools.data.sequential(
        constants.answer_batch_size, 
        num_questions, 
        1, 
@@ -153,6 +153,8 @@ class eval_mgr():
                dict_entry['negative_answers'][answer] = \
                    str(eval_vars_dict['answer_score_' + str(j)][0,i+1])
            
+            dict_entry['relevance_scores'] = eval_vars_dict['relevance_prob_' + str(j)].tolist()
+
            question_id = batch['question_id'][j]
            pred_answer, pred_score = self.get_pred_answer(
                [batch['positive_answer_unencoded'][j]] + \
@@ -168,7 +170,7 @@ class eval_mgr():
            
            self.eval_data[str(question_id)] = dict_entry

-            print dict_entry
+            # print dict_entry

        self.total += batch_size
        
@@ -259,6 +261,7 @@ if __name__=='__main__':
        0,
        0,
        0,
+        constants.answer_obj_atr_loss_wt,
        resnet_feat_dim=constants.resnet_feat_dim,
        training=False)

@@ -283,11 +286,13 @@ if __name__=='__main__':
    for j in xrange(constants.answer_batch_size):
        vars_to_eval_dict['answer_score_'+str(j)] = \
            graph.answer_inference.answer_score[j]
+        vars_to_eval_dict['relevance_prob_'+str(j)] = \
+            graph.relevance_inference.answer_region_prob[j]

    print 'Creating evaluation manager...'
    evaluator = eval_mgr(
        constants.answer_eval_data_json,
-        constants.answer_results_json)
+        constants.answer_eval_results_json)

    print 'Start training...'
    eval(

--- a/answer_classifier_cached_features/select_best_model.py
+++ b/answer_classifier_cached_features/select_best_model.py
@@ -245,6 +245,7 @@ def eval_model(model_to_eval, results_json):
        0,
        0,
        0,
+        constants.answer_obj_atr_loss_wt,
        resnet_feat_dim=constants.resnet_feat_dim,
        training=False)


--- a/constants_crunchy.py
+++ b/constants_crunchy.py
@@ -159,13 +159,13 @@ num_test_questions = 0

 # Answer classifier training params
 answer_batch_size = 50
-answer_num_epochs = 4
+answer_num_epochs = 6
 answer_offset = 0
 answer_obj_atr_loss_wt = 0.1
 answer_regularization_coeff = 1e-5
 answer_queue_size = 500
 answer_embedding_dim = 600
-answer_lr = 1e-3
+answer_lr = 1e-4
 answer_log_every_n_iter = 500
 answer_output_dir = os.path.join(
    global_experiment_dir,
@@ -185,12 +185,12 @@ answer_model = os.path.join(
 num_regions_with_labels = 100

 # Answer fine tune params
-answer_fine_tune_from_iter = 17000
+answer_fine_tune_from_iter = 19500
 answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter)

 # Answer eval params
 answer_eval_on = 'val'
-answer_model_to_eval = answer_model + '-13000'
+answer_model_to_eval = answer_model + '-39000'

 answer_eval_data_json = os.path.join(
    answer_output_dir,

--- a/image_io.py
+++ b/image_io.py
 from PIL import Image
+import matplotlib.pyplot as plt
 import numpy as np
 import math
 import pdb
@@ -40,6 +41,11 @@ def imshow(np_im):

    im.show()

+def imshow2(np_im):
+    imgplot = plt.imshow(np_im)
+    plt.show()
+    return imgplot
+

 def imwrite(np_im, filename):
    """

--- a/region_relevance_network/inference.py
+++ b/region_relevance_network/inference.py
@@ -70,59 +70,6 @@ class RegionRelevanceInference():
                    self.answer_region_scores[j],
                    'region_relevance_prob')

-# class RegionRelevanceInference():
-#     def __init__(
-#             self,
-#             object_feat,
-#             attribute_feat,
-#             question_bins,
-#             answers):
-        
-#         self.object_feat = object_feat
-#         self.attribute_feat = attribute_feat
-#         self.question_bins = question_bins
-#         self.answers = answers
-        
-#         with tf.variable_scope('region_relevance_graph'):
-#             # Compute dot product with question bins
-#             self.question_object_scores = tf.matmul(
-#                 self.question_bins,
-#                 tf.transpose(self.object_feat))
-            
-#             self.question_attribute_scores = tf.matmul(
-#                 self.question_bins,
-#                 tf.transpose(self.attribute_feat))
-        
-#             self.question_object_scores = tf.reduce_mean(
-#                 self.question_object_scores,
-#                 0,
-#                 keep_dims = True)
-
-#             self.question_attribute_scores = tf.reduce_mean(
-#                 self.question_attribute_scores,
-#                 0,
-#                 keep_dims = True)
-
-#             # Compute dot product with answers
-#             self.answer_object_scores = tf.matmul(
-#                 self.answers,
-#                 tf.transpose(self.object_feat))
-            
-#             self.answer_attribute_scores = tf.matmul(
-#                 self.answers,
-#                 tf.transpose(self.attribute_feat))
-
-#             # Computer final scores
-#             self.answer_region_scores = \
-#                 tf.square(self.question_object_scores) + \
-#                 tf.square(self.question_attribute_scores) + \
-#                 tf.square(self.answer_object_scores) + \
-#                 tf.square(self.answer_attribute_scores)
-
-#             self.answer_region_prob = tf.nn.softmax(
-#                 self.answer_region_scores,
-#                 'region_relevance_prob')
-
            
            
        

--- a/visual_util/__init__.py
+++ b/visual_util/__init__.py
--- a/visual_util/html_writer.py
+++ b/visual_util/html_writer.py
+class HtmlWriter():
+    def __init__(self, filename):
+        self.filename = filename
+        self.html_file = open(self.filename, 'w')
+        self.html_file.write("""<!DOCTYPE html>\n<html>\n<body>\n<table border="1" style="width:100%"> \n""")
+    
+    def add_element(self, col_dict):
+        self.html_file.write('    <tr>\n')
+        for key in range(len(col_dict)):
+            self.html_file.write("""    <td>{}</td>\n""".format(col_dict[key]))
+        self.html_file.write('    </tr>\n')
+
+    def image_tag(self, image_path, height, width):
+        return """<img src="{}" alt="IMAGE NOT FOUND!" height={} width={}>""".format(image_path,height,width)
+        
+    def close_file(self):
+        self.html_file.write('</table>\n</body>\n</html>')
+        self.html_file.close()
--- a/visual_util/visualize_relevance.py
+++ b/visual_util/visualize_relevance.py
+import ujson
+import os
+import csv
+import numpy as np
+from matplotlib import cm
+
+import image_io
+from html_writer import HtmlWriter
+import pdb
+
+class RelevanceVisualizer():
+    def __init__(
+            self,
+            eval_data_json,
+            anno_data_json,
+            image_dir,
+            region_dir,
+            output_dir,
+            data_type):
+        self.image_dir = image_dir
+        self.region_dir = region_dir
+        self.output_dir = output_dir
+        self.data_type = data_type
+        self.eval_data = self.read_json_file(eval_data_json)
+        self.anno_data = self.read_json_file(anno_data_json)
+        self.html_filename = os.path.join(output_dir, 'index.html')
+        self.html_writer = HtmlWriter(self.html_filename)
+
+    def read_json_file(self, filename):
+        print 'Reading {} ...'.format(filename)
+        with open(filename, 'r') as file:
+            return ujson.load(file)
+
+    def get_image_name(self,qid):
+        image_id = self.anno_data[qid]['image_id']
+        image_name = 'COCO_' + self.data_type + '_' + str(image_id).zfill(12)
+        image_path = os.path.join(image_dir, image_name + '.jpg')
+        return image_name, image_path
+
+    def get_image(self, key):
+        image_name, image_path = self.get_image_name(key)
+        im = image_io.imread(image_path)
+        return im
+
+    def get_bboxes(self, key):
+        image_name, image_path = self.get_image_name(key)
+        bbox_dir = os.path.join(self.region_dir, image_name)
+        bbox_csv = os.path.join(bbox_dir, 'edge_boxes.csv')
+        bboxes = []
+        with open(bbox_csv, 'r') as csvfile:
+            bbox_reader = csv.DictReader(
+                csvfile, 
+                delimiter=',', 
+                fieldnames=['x', 'y', 'w', 'h', 'score'])
+            for bbox in bbox_reader:
+                bboxes.append(bbox)
+        return bboxes
+        
+    def get_pos_ans_rel_scores(self, key):
+        ans, score = self.eval_data[key]['positive_answer'].items()[0]
+        return ans, float(score), self.eval_data[key]['relevance_scores'][0]
+        
+    def get_pred_ans_rel_scores(self, key):
+        ans, score = self.eval_data[key]['positive_answer'].items()[0]
+        score = float(score)
+        pred_ans_score = (ans, score)
+        pred_ans_id = 0
+        count = 1
+        for ans, score in self.eval_data[key]['negative_answers'].items():
+            score = float(score)
+            if score > pred_ans_score[1]:
+                pred_ans_score = (ans, score)
+                pred_ans_id = count
+            count += 1
+
+        return pred_ans_score[0], pred_ans_score[1], \
+            self.eval_data[key]['relevance_scores'][pred_ans_id]
+
+    def get_box_score_pairs(self, bboxes, scores):
+        pairs = []
+        for i, bbox in enumerate(bboxes):
+            pairs.append((bbox, scores[i]))
+        return pairs
+
+    def create_relevance_map(self, key, mode='pos'):
+        image_name, image_path = self.get_image_name(key)
+        im = image_io.imread(image_path)
+        if len(im.shape)==2:
+            im_h, im_w = im.shape
+            im_ = im
+            im = np.zeros([im_h, im_w, 3])
+            for i in xrange(3):
+                im[:,:,i] = im_
+
+        bboxes = self.get_bboxes(key)
+        if mode=='pos':
+            ans, ans_score, rel_scores = self.get_pos_ans_rel_scores(key)
+        elif mode=='pred':
+            ans, ans_score, rel_scores = self.get_pred_ans_rel_scores(key)
+        else:
+            print 'mode can only take values {\'pred\' or \'pos\'}'
+            raise
+        box_score_pairs = self.get_box_score_pairs(bboxes, rel_scores)
+        rel_map = np.zeros(im.shape[0:2])
+        for box, score in box_score_pairs:
+            gauss_map = self.make_gaussian(box, im.shape[0:2])
+            rel_map = np.maximum(rel_map, score*gauss_map)
+        rel_map_ =cm.jet(np.uint8(rel_map*255))[:,:,0:3]*255
+        im_rel_map = np.uint8(0.3*im+0.7*rel_map_)
+        return rel_map, im_rel_map, ans, ans_score
+
+    def make_gaussian(self, box, im_size):
+        im_h, im_w = im_size
+        x = np.arange(im_w)
+        y = np.arange(im_h)
+        xx, yy = np.meshgrid(x, y)
+        sigma_x = float(box['w'])/4.0
+        sigma_y = float(box['h'])/4.0
+        cx = float(box['x']) + float(box['w'])/2.0
+        cy = float(box['y']) + float(box['w'])/2.0
+        g = np.exp(-((xx-cx)**2/(2*sigma_x**2)) - ((yy-cy)**2/(2*sigma_y**2)))
+        return g
+
+    def write_html(self):
+        col_dict = {
+            0: 'Question',
+            1: 'Pos. Answer',
+            2: 'Pos. Relevance',
+            3: 'Pred. Answer',
+            4: 'Pred. Relevance',
+        }
+        self.html_writer.add_element(col_dict)
+        for qid in rel_vis.eval_data.keys():
+            question = rel_vis.anno_data[qid]['question']
+
+            pred_rel, pred_im_rel, pred_ans, pred_score = rel_vis.create_relevance_map(
+                qid,
+                mode='pred')
+            pos_rel, pos_im_rel, pos_ans, pos_score = rel_vis.create_relevance_map(
+                qid,
+                mode='pos')
+
+            if np.max(pred_rel) < 0.5:
+                continue
+
+            pred_im_name = 'pred_rel_' + qid + '.jpg'
+            pos_im_name = 'pos_rel_' + qid + '.jpg'
+
+            pred_rel_filename = os.path.join(self.output_dir, pred_im_name)
+            pos_rel_filename = os.path.join(self.output_dir, pos_im_name)
+
+            image_io.imwrite(pred_im_rel, pred_rel_filename)
+            image_io.imwrite(pos_im_rel, pos_rel_filename)
+
+            im_h, im_w = pred_rel.shape[0:2]
+            col_dict = {
+                0 : question,
+                1 : pos_ans + ': ' + str(pos_score),      
+                2 : self.html_writer.image_tag(pos_im_name, im_h, im_w),
+                3 : pred_ans + ': ' + str(pred_score),               
+                4 : self.html_writer.image_tag(pred_im_name, im_h, im_w),
+            }
+            
+            self.html_writer.add_element(col_dict)
+
+        self.html_writer.close()
+
+
+if __name__=='__main__':
+    data_type = 'val2014'
+    data_dir = '/home/ssd/VQA'
+
+    image_dir = os.path.join(data_dir, data_type)
+
+    region_dir = os.path.join(
+        data_dir,
+        data_type + '_cropped_large')
+
+    anno_data_json = os.path.join(
+        data_dir, 
+        'mscoco_val2014_annotations_with_parsed_questions.json')
+
+    exp_dir = '/home/tanmay/Code/GenVQA/Exp_Results/VQA/' + \
+              'QA_explicit_dot_joint_training_pretrained_same_lr/'
+
+    eval_data_json = os.path.join(
+        exp_dir,
+        'answer_classifiers/eval_val_data.json')
+
+    output_dir = os.path.join(exp_dir, 'qual_results2')
+    if not os.path.exists(output_dir):
+        os.mkdir(output_dir)
+
+    rel_vis = RelevanceVisualizer(
+        eval_data_json,
+        anno_data_json,
+        image_dir,
+        region_dir,
+        output_dir,
+        data_type)
+
+    rel_vis.write_html()
+#    key = '5289770'
+    # keys = rel_vis.eval_data.keys()
+
+    # for key in keys:
+    #     question = rel_vis.anno_data[key]['question']
+    #     answer = rel_vis.anno_data[key]['multiple_choice_answer']
+    #     print 'Q: ' + question
+    #     print 'GT A: ' + answer
+    #     _, rel, ans, score = rel_vis.create_relevance_map(key,mode='pred')
+    #     print 'Pred A: ' + ans
+    #     imgplot = image_io.imshow2(rel)