diff --git a/answer_classifier_cached_features/eval.py b/answer_classifier_cached_features/eval.py index e5b2a4ec2205b80bbe0cabb80aca9dd3ec9ced1f..0f6b0debfcfe79bd6bbf2d01b12dc6f032831997 100644 --- a/answer_classifier_cached_features/eval.py +++ b/answer_classifier_cached_features/eval.py @@ -59,7 +59,7 @@ def create_batch_generator(mode): constants.num_negative_answers, resnet_feat_dim=constants.resnet_feat_dim) - index_generator = tftools.data.random( + index_generator = tftools.data.sequential( constants.answer_batch_size, num_questions, 1, @@ -153,6 +153,8 @@ class eval_mgr(): dict_entry['negative_answers'][answer] = \ str(eval_vars_dict['answer_score_' + str(j)][0,i+1]) + dict_entry['relevance_scores'] = eval_vars_dict['relevance_prob_' + str(j)].tolist() + question_id = batch['question_id'][j] pred_answer, pred_score = self.get_pred_answer( [batch['positive_answer_unencoded'][j]] + \ @@ -168,7 +170,7 @@ class eval_mgr(): self.eval_data[str(question_id)] = dict_entry - print dict_entry + # print dict_entry self.total += batch_size @@ -259,6 +261,7 @@ if __name__=='__main__': 0, 0, 0, + constants.answer_obj_atr_loss_wt, resnet_feat_dim=constants.resnet_feat_dim, training=False) @@ -283,11 +286,13 @@ if __name__=='__main__': for j in xrange(constants.answer_batch_size): vars_to_eval_dict['answer_score_'+str(j)] = \ graph.answer_inference.answer_score[j] + vars_to_eval_dict['relevance_prob_'+str(j)] = \ + graph.relevance_inference.answer_region_prob[j] print 'Creating evaluation manager...' evaluator = eval_mgr( constants.answer_eval_data_json, - constants.answer_results_json) + constants.answer_eval_results_json) print 'Start training...' eval( diff --git a/constants_crunchy.py b/constants_crunchy.py index 9402eb2b6f3c846ecfa06ac9359d986c99189564..1e8cb93cb4fac07777b10da94d2cbdc8b39df16c 100644 --- a/constants_crunchy.py +++ b/constants_crunchy.py @@ -190,7 +190,7 @@ answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter) # Answer eval params answer_eval_on = 'val' -answer_model_to_eval = answer_model + '-49500' +answer_model_to_eval = answer_model + '-39000' answer_eval_data_json = os.path.join( answer_output_dir, diff --git a/image_io.py b/image_io.py index 578b8d91b551c1e967dfba03b8c2f8f2fc8dc3a8..3311c58af9aef094137821a76b68b4b7e8430eaa 100644 --- a/image_io.py +++ b/image_io.py @@ -1,4 +1,5 @@ from PIL import Image +import matplotlib.pyplot as plt import numpy as np import math import pdb @@ -40,6 +41,11 @@ def imshow(np_im): im.show() +def imshow2(np_im): + imgplot = plt.imshow(np_im) + plt.show() + return imgplot + def imwrite(np_im, filename): """ diff --git a/region_relevance_network/inference.py b/region_relevance_network/inference.py index 40f846e3a10a5413be948187bf7ef11698943b65..bb11c280256b3b6bb7578a79fa6458d2d5a25da5 100644 --- a/region_relevance_network/inference.py +++ b/region_relevance_network/inference.py @@ -70,59 +70,6 @@ class RegionRelevanceInference(): self.answer_region_scores[j], 'region_relevance_prob') -# class RegionRelevanceInference(): -# def __init__( -# self, -# object_feat, -# attribute_feat, -# question_bins, -# answers): - -# self.object_feat = object_feat -# self.attribute_feat = attribute_feat -# self.question_bins = question_bins -# self.answers = answers - -# with tf.variable_scope('region_relevance_graph'): -# # Compute dot product with question bins -# self.question_object_scores = tf.matmul( -# self.question_bins, -# tf.transpose(self.object_feat)) - -# self.question_attribute_scores = tf.matmul( -# self.question_bins, -# tf.transpose(self.attribute_feat)) - -# self.question_object_scores = tf.reduce_mean( -# self.question_object_scores, -# 0, -# keep_dims = True) - -# self.question_attribute_scores = tf.reduce_mean( -# self.question_attribute_scores, -# 0, -# keep_dims = True) - -# # Compute dot product with answers -# self.answer_object_scores = tf.matmul( -# self.answers, -# tf.transpose(self.object_feat)) - -# self.answer_attribute_scores = tf.matmul( -# self.answers, -# tf.transpose(self.attribute_feat)) - -# # Computer final scores -# self.answer_region_scores = \ -# tf.square(self.question_object_scores) + \ -# tf.square(self.question_attribute_scores) + \ -# tf.square(self.answer_object_scores) + \ -# tf.square(self.answer_attribute_scores) - -# self.answer_region_prob = tf.nn.softmax( -# self.answer_region_scores, -# 'region_relevance_prob') - diff --git a/visual_util/__init__.py b/visual_util/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/visual_util/html_writer.py b/visual_util/html_writer.py new file mode 100644 index 0000000000000000000000000000000000000000..9e6327eefff2f7d3bb4d29d5bc4aa63cad416167 --- /dev/null +++ b/visual_util/html_writer.py @@ -0,0 +1,18 @@ +class HtmlWriter(): + def __init__(self, filename): + self.filename = filename + self.html_file = open(self.filename, 'w') + self.html_file.write("""<!DOCTYPE html>\n<html>\n<body>\n<table border="1" style="width:100%"> \n""") + + def add_element(self, col_dict): + self.html_file.write(' <tr>\n') + for key in range(len(col_dict)): + self.html_file.write(""" <td>{}</td>\n""".format(col_dict[key])) + self.html_file.write(' </tr>\n') + + def image_tag(self, image_path, height, width): + return """<img src="{}" alt="IMAGE NOT FOUND!" height={} width={}>""".format(image_path,height,width) + + def close_file(self): + self.html_file.write('</table>\n</body>\n</html>') + self.html_file.close() diff --git a/visual_util/visualize_relevance.py b/visual_util/visualize_relevance.py new file mode 100644 index 0000000000000000000000000000000000000000..521123b1ead4452b93db38989229567076ddc73a --- /dev/null +++ b/visual_util/visualize_relevance.py @@ -0,0 +1,213 @@ +import ujson +import os +import csv +import numpy as np +from matplotlib import cm + +import image_io +from html_writer import HtmlWriter +import pdb + +class RelevanceVisualizer(): + def __init__( + self, + eval_data_json, + anno_data_json, + image_dir, + region_dir, + output_dir, + data_type): + self.image_dir = image_dir + self.region_dir = region_dir + self.output_dir = output_dir + self.data_type = data_type + self.eval_data = self.read_json_file(eval_data_json) + self.anno_data = self.read_json_file(anno_data_json) + self.html_filename = os.path.join(output_dir, 'index.html') + self.html_writer = HtmlWriter(self.html_filename) + + def read_json_file(self, filename): + print 'Reading {} ...'.format(filename) + with open(filename, 'r') as file: + return ujson.load(file) + + def get_image_name(self,qid): + image_id = self.anno_data[qid]['image_id'] + image_name = 'COCO_' + self.data_type + '_' + str(image_id).zfill(12) + image_path = os.path.join(image_dir, image_name + '.jpg') + return image_name, image_path + + def get_image(self, key): + image_name, image_path = self.get_image_name(key) + im = image_io.imread(image_path) + return im + + def get_bboxes(self, key): + image_name, image_path = self.get_image_name(key) + bbox_dir = os.path.join(self.region_dir, image_name) + bbox_csv = os.path.join(bbox_dir, 'edge_boxes.csv') + bboxes = [] + with open(bbox_csv, 'r') as csvfile: + bbox_reader = csv.DictReader( + csvfile, + delimiter=',', + fieldnames=['x', 'y', 'w', 'h', 'score']) + for bbox in bbox_reader: + bboxes.append(bbox) + return bboxes + + def get_pos_ans_rel_scores(self, key): + ans, score = self.eval_data[key]['positive_answer'].items()[0] + return ans, float(score), self.eval_data[key]['relevance_scores'][0] + + def get_pred_ans_rel_scores(self, key): + ans, score = self.eval_data[key]['positive_answer'].items()[0] + score = float(score) + pred_ans_score = (ans, score) + pred_ans_id = 0 + count = 1 + for ans, score in self.eval_data[key]['negative_answers'].items(): + score = float(score) + if score > pred_ans_score[1]: + pred_ans_score = (ans, score) + pred_ans_id = count + count += 1 + + return pred_ans_score[0], pred_ans_score[1], \ + self.eval_data[key]['relevance_scores'][pred_ans_id] + + def get_box_score_pairs(self, bboxes, scores): + pairs = [] + for i, bbox in enumerate(bboxes): + pairs.append((bbox, scores[i])) + return pairs + + def create_relevance_map(self, key, mode='pos'): + image_name, image_path = self.get_image_name(key) + im = image_io.imread(image_path) + if len(im.shape)==2: + im_h, im_w = im.shape + im_ = im + im = np.zeros([im_h, im_w, 3]) + for i in xrange(3): + im[:,:,i] = im_ + + bboxes = self.get_bboxes(key) + if mode=='pos': + ans, ans_score, rel_scores = self.get_pos_ans_rel_scores(key) + elif mode=='pred': + ans, ans_score, rel_scores = self.get_pred_ans_rel_scores(key) + else: + print 'mode can only take values {\'pred\' or \'pos\'}' + raise + box_score_pairs = self.get_box_score_pairs(bboxes, rel_scores) + rel_map = np.zeros(im.shape[0:2]) + for box, score in box_score_pairs: + gauss_map = self.make_gaussian(box, im.shape[0:2]) + rel_map = np.maximum(rel_map, score*gauss_map) + rel_map_ =cm.jet(np.uint8(rel_map*255))[:,:,0:3]*255 + im_rel_map = np.uint8(0.3*im+0.7*rel_map_) + return rel_map, im_rel_map, ans, ans_score + + def make_gaussian(self, box, im_size): + im_h, im_w = im_size + x = np.arange(im_w) + y = np.arange(im_h) + xx, yy = np.meshgrid(x, y) + sigma_x = float(box['w'])/4.0 + sigma_y = float(box['h'])/4.0 + cx = float(box['x']) + float(box['w'])/2.0 + cy = float(box['y']) + float(box['w'])/2.0 + g = np.exp(-((xx-cx)**2/(2*sigma_x**2)) - ((yy-cy)**2/(2*sigma_y**2))) + return g + + def write_html(self): + col_dict = { + 0: 'Question', + 1: 'Pos. Answer', + 2: 'Pos. Relevance', + 3: 'Pred. Answer', + 4: 'Pred. Relevance', + } + self.html_writer.add_element(col_dict) + for qid in rel_vis.eval_data.keys(): + question = rel_vis.anno_data[qid]['question'] + + pred_rel, pred_im_rel, pred_ans, pred_score = rel_vis.create_relevance_map( + qid, + mode='pred') + pos_rel, pos_im_rel, pos_ans, pos_score = rel_vis.create_relevance_map( + qid, + mode='pos') + + if np.max(pred_rel) < 0.5: + continue + + pred_im_name = 'pred_rel_' + qid + '.jpg' + pos_im_name = 'pos_rel_' + qid + '.jpg' + + pred_rel_filename = os.path.join(self.output_dir, pred_im_name) + pos_rel_filename = os.path.join(self.output_dir, pos_im_name) + + image_io.imwrite(pred_im_rel, pred_rel_filename) + image_io.imwrite(pos_im_rel, pos_rel_filename) + + im_h, im_w = pred_rel.shape[0:2] + col_dict = { + 0 : question, + 1 : pos_ans + ': ' + str(pos_score), + 2 : self.html_writer.image_tag(pos_im_name, im_h, im_w), + 3 : pred_ans + ': ' + str(pred_score), + 4 : self.html_writer.image_tag(pred_im_name, im_h, im_w), + } + + self.html_writer.add_element(col_dict) + + self.html_writer.close() + + +if __name__=='__main__': + data_type = 'val2014' + data_dir = '/home/ssd/VQA' + + image_dir = os.path.join(data_dir, data_type) + + region_dir = os.path.join( + data_dir, + data_type + '_cropped_large') + + anno_data_json = os.path.join( + data_dir, + 'mscoco_val2014_annotations_with_parsed_questions.json') + + exp_dir = '/home/tanmay/Code/GenVQA/Exp_Results/VQA/' + \ + 'QA_explicit_dot_joint_training_pretrained_same_lr/' + + eval_data_json = os.path.join( + exp_dir, + 'answer_classifiers/eval_val_data.json') + + output_dir = os.path.join(exp_dir, 'qual_results2') + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + rel_vis = RelevanceVisualizer( + eval_data_json, + anno_data_json, + image_dir, + region_dir, + output_dir, + data_type) + + rel_vis.write_html() +# key = '5289770' + # keys = rel_vis.eval_data.keys() + + # for key in keys: + # question = rel_vis.anno_data[key]['question'] + # answer = rel_vis.anno_data[key]['multiple_choice_answer'] + # print 'Q: ' + question + # print 'GT A: ' + answer + # _, rel, ans, score = rel_vis.create_relevance_map(key,mode='pred') + # print 'Pred A: ' + ans + # imgplot = image_io.imshow2(rel)