diff --git a/classifiers/answer_classifier/ans_data_io_helper.py b/classifiers/answer_classifier/ans_data_io_helper.py index b6b51eb123140dc5290ea4f82c4b2bea4428df62..959e70ed8c68f3430e9c16c7acf9fda58a5664d5 100644 --- a/classifiers/answer_classifier/ans_data_io_helper.py +++ b/classifiers/answer_classifier/ans_data_io_helper.py @@ -165,7 +165,6 @@ class batch_creator(): region_score = np.zeros(shape=[1,count]) partition = np.zeros(shape=[count]) parsed_q = dict() -# question_encodings = np.zeros(shape=[count, len(vocab)]) for i in xrange(batch_size): q_id = q_ids[i] @@ -179,16 +178,6 @@ class batch_creator(): gt_regions_for_image, False) - # question_encoding_tmp = np.zeros(shape=[1, len(vocab)]) - # for word in question[0:-1].split(): - # if word.lower() not in vocab: - # word = 'unk' - # question_encoding_tmp[0, vocab[word.lower()]] += 1 - - # question_len = np.sum(question_encoding_tmp) - # assert (not question_len==0) - # question_encoding_tmp /= question_len - for j in xrange(num_proposals): counter = j + i*num_proposals parsed_q[counter] = parsed_q_dict[q_id] @@ -200,8 +189,6 @@ class batch_creator(): region_score[0,counter] = proposal.score partition[counter] = i -# question_encodings[counter,:] = question_encoding_tmp - score_start_id = i*num_proposals region_score[0, score_start_id:score_start_id+num_proposals] /=\ np.sum(region_score[0,score_start_id @@ -209,9 +196,7 @@ class batch_creator(): return region_images, ans_labels, parsed_q, \ region_score, partition - # return region_images, ans_labels, question_encodings, \ - # region_score, partition - + def reshape_score(self, region_score): num_cols = num_proposals @@ -301,10 +286,11 @@ class FeedDictCreator(): class RelFeedDictCreator(FeedDictCreator): def __init__(self, region_images, parsed_q, - gt_region_scores, keep_prob, plholder_dict, vocab): + gt_region_scores, keep_prob, plholder_dict, vocab, is_train): FeedDictCreator.__init__(self, region_images, parsed_q, keep_prob, plholder_dict, vocab) self.feed_dict[plholder_dict['gt_scores']] = gt_region_scores + self.feed_dict[plholder_dict['is_train']] = is_train class AnsFeedDictCreator(FeedDictCreator): diff --git a/classifiers/answer_classifier/eval_ans_classifier_simple.py b/classifiers/answer_classifier/eval_ans_classifier_simple.py index d50ad5db3cc1229dcc94f67410f5660395f5105d..602d359f1cb6e504c2947c3bdb1fc62a5738f73b 100644 --- a/classifiers/answer_classifier/eval_ans_classifier_simple.py +++ b/classifiers/answer_classifier/eval_ans_classifier_simple.py @@ -10,6 +10,7 @@ import pdb import tensorflow as tf import tf_graph_creation_helper as graph_creator import ans_graph_creator +import rel_graph_creator import plot_helper as plotter import ans_data_io_helper as ans_io_helper import region_ranker.perfect_ranker as region_proposer @@ -134,11 +135,25 @@ def eval(eval_params): y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat) atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat') atr_feat = atr_feat_op.outputs[0] - pred_rel_score = graph_creator.rel_comp_graph(plholder_dict, - obj_feat, atr_feat, - y_pred_obj, y_pred_atr, - 'q_obj_atr_reg', - 1.0, len(vocab), batch_size) + # pred_rel_score = graph_creator.rel_comp_graph(plholder_dict, + # obj_feat, atr_feat, + # y_pred_obj, y_pred_atr, + # 'q_obj_atr_reg', + # 1.0, len(vocab), batch_size) + + rel_graph = rel_graph_creator.rel_graph_creator(plholder_dict, + 1.0, + obj_feat, + atr_feat, + y_pred_obj, + y_pred_atr, + len(vocab), + batch_size, + graph_creator.graph_config, + 'q_obj_atr_reg_explt', + False) + + pred_rel_score = rel_graph.rel_score ans_graph = ans_graph_creator.ans_graph_creator(plholder_dict, obj_feat, @@ -158,7 +173,8 @@ def eval(eval_params): pred_rel_score_vec = tf.reshape(pred_rel_score, [1, batch_size*ans_io_helper.num_proposals]) - y_avg = graph_creator.aggregate_y_pred(y_pred, region_score, + y_avg = graph_creator.aggregate_y_pred(y_pred, + region_score, batch_size, ans_io_helper.num_proposals, len(ans_vocab)) @@ -286,7 +302,7 @@ def create_html_file(outdir, test_anno_filename, regions_anno_filename, if __name__=='__main__': - mode = 'q_obj_atr' + mode = 'q_obj_atr_reg' model_num = 4 ans_classifier_eval_params = { 'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json', @@ -296,7 +312,7 @@ if __name__=='__main__': 'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images', 'image_regions_dir': '/mnt/ramdisk/image_regions', 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin', - 'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt/rel_classifier_q_obj_atr_reg_explt-9', + 'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt_At_End/rel_classifier_q_obj_atr_reg_explt-4', 'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin/ans_classifier_' + mode + '-' + str(model_num), 'mode' : mode, 'batch_size': 20, diff --git a/classifiers/answer_classifier/train_ans_classifier_simple.py b/classifiers/answer_classifier/train_ans_classifier_simple.py index b787ac53a99796b9eddd406a53989afd03fa36b5..79606761f72bdddff69bccd3863050c254d9079f 100644 --- a/classifiers/answer_classifier/train_ans_classifier_simple.py +++ b/classifiers/answer_classifier/train_ans_classifier_simple.py @@ -12,6 +12,7 @@ import object_classifiers.obj_data_io_helper as obj_data_loader import attribute_classifiers.atr_data_io_helper as atr_data_loader import tf_graph_creation_helper as graph_creator import ans_graph_creator +import rel_graph_creator import plot_helper as plotter import ans_data_io_helper as ans_io_helper import region_ranker.perfect_ranker as region_proposer @@ -101,12 +102,19 @@ def train(train_params): atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat') atr_feat = atr_feat_op.outputs[0] - pred_rel_score = graph_creator.rel_comp_graph(plholder_dict, - obj_feat, atr_feat, - y_pred_obj, y_pred_atr, - 'q_obj_atr_reg_explt', - 1.0, len(vocab), - batch_size) + rel_graph = rel_graph_creator.rel_graph_creator(plholder_dict, + 1.0, + obj_feat, + atr_feat, + y_pred_obj, + y_pred_atr, + len(vocab), + batch_size, + graph_creator.graph_config, + 'q_obj_atr_reg_explt', + False) + + pred_rel_score = rel_graph.rel_score # Restore rel, obj and attribute classifier parameters rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel') @@ -138,7 +146,7 @@ def train(train_params): [1, batch_size*ans_io_helper.num_proposals]) y_avg = graph_creator.aggregate_y_pred(y_pred, - pred_rel_score_vec, batch_size, + region_score, batch_size, ans_io_helper.num_proposals, len(ans_vocab)) @@ -164,8 +172,9 @@ def train(train_params): str(train_params['start_model'])) elif train_params['mode']=='q_obj_atr_reg': - partial_model = os.path.join(outdir, 'ans_classifier_q_obj_atr-' + \ - str(train_params['start_model'])) + # partial_model = os.path.join(outdir, 'ans_classifier_q_obj_atr-' + \ + # str(train_params['start_model'])) + partial_model = '' # Fine tune begining with a previous model if train_params['fine_tune']==True: @@ -208,7 +217,7 @@ def train(train_params): if train_params['mode']=='q': vars_to_init = ans_vars + optimizer_vars else: - vars_to_init = optimizer_vars + vars_to_init = ans_vars + optimizer_vars sess.run(tf.initialize_variables(vars_to_init)) diff --git a/classifiers/region_ranker/eval_rel_classifier_simple.py b/classifiers/region_ranker/eval_rel_classifier_simple.py new file mode 100644 index 0000000000000000000000000000000000000000..c7919fea328fd0211f287bda1c15b80b5b6e8086 --- /dev/null +++ b/classifiers/region_ranker/eval_rel_classifier_simple.py @@ -0,0 +1,118 @@ +import sys +import os +import json +import math +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +import numpy as np +import pdb +import tensorflow as tf +import answer_classifier.ans_data_io_helper as ans_io_helper +import region_ranker.perfect_ranker as region_proposer +#import region_ranker.train_rel_classifier as rel_trainer +import region_ranker.train_rel_classifier_simple as rel_trainer +import tf_graph_creation_helper as graph_creator +import rel_graph_creator +import plot_helper as plotter + +def eval(eval_params): + sess = tf.InteractiveSession() + train_anno_filename = eval_params['train_json'] + test_anno_filename = eval_params['test_json'] + parsed_q_filename = eval_params['parsed_q_json'] + regions_anno_filename = eval_params['regions_json'] + whole_image_dir = eval_params['image_dir'] + image_regions_dir = eval_params['image_regions_dir'] + outdir = eval_params['outdir'] + model_basedir = eval_params['model_basedir'] + model_number = eval_params['model_number'] + mode = eval_params['mode'] + batch_size = eval_params['batch_size'] + test_start_id = eval_params['test_start_id'] + test_set_size = eval_params['test_set_size'] + model = os.path.join(model_basedir, 'rel_classifier_' + mode + \ + '-' + str(model_number)) + if not os.path.exists(outdir): + os.mkdir(outdir) + + qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename) + qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename) + parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename) + region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename) + ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict() + vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train) + + + # Create graph + g = tf.get_default_graph() + plholder_dict = \ + graph_creator.placeholder_inputs_rel(ans_io_helper.num_proposals, + len(vocab), mode='gt') + image_regions = plholder_dict['image_regions'] + y = plholder_dict['gt_scores'] + keep_prob = plholder_dict['keep_prob'] + + y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0) + obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat') + obj_feat = obj_feat_op.outputs[0] + y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat) + atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat') + atr_feat = atr_feat_op.outputs[0] + rel_graph = rel_graph_creator.rel_graph_creator(plholder_dict, + keep_prob, + obj_feat, + atr_feat, + y_pred_obj, + y_pred_atr, + len(vocab), + batch_size, + graph_creator.graph_config, + mode, + False) + y_pred = rel_graph.rel_score + + # Restore model + restorer = tf.train.Saver() + if os.path.exists(model): + restorer.restore(sess, model) + else: + print 'Failed to read model from file ' + model + + # Load mean image + mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \ + 'Obj_Classifier/mean_image.npy') + + # Batch creator + test_batch_creator = ans_io_helper.batch_creator(test_start_id, + test_start_id + + test_set_size - 1) + + # Test Recall + test_recall = rel_trainer.evaluate(y_pred, qa_anno_dict, + region_anno_dict, parsed_q_dict, + ans_vocab, vocab, + image_regions_dir, mean_image, + test_start_id, test_set_size, + batch_size, plholder_dict, + 75, 75, test_batch_creator,verbose=True) + + # html_dir = os.path.join(outdir,'rel_html') + # test_recall = rel_trainer.evaluate_with_vis(y_pred, + # qa_anno_dict, + # region_anno_dict, + # parsed_q_dict, + # ans_vocab, + # vocab, + # image_regions_dir, + # mean_image, + # test_start_id, + # test_set_size, + # batch_size, + # plholder_dict, + # 75, + # 75, + # test_batch_creator, + # html_dir, + # whole_image_dir, + # verbose=True) + print('Test Rec: ' + str(test_recall)) diff --git a/classifiers/region_ranker/perfect_ranker.py b/classifiers/region_ranker/perfect_ranker.py index 2dd92d5c86111b467378c1558520de572320bedc..40f185f5af071ec85b3d604c8dd663bc3eb3094e 100644 --- a/classifiers/region_ranker/perfect_ranker.py +++ b/classifiers/region_ranker/perfect_ranker.py @@ -51,7 +51,7 @@ def get_region_coords(img_height, img_width): print(region_coords) return region_coords, region_coords_ -def rank_regions(image, question, region_coords, region_coords_, +def rank_regions2(image, question, region_coords, region_coords_, gt_regions_for_image, crop=True): num_regions, _ = region_coords.shape @@ -83,6 +83,7 @@ def rank_regions(image, question, region_coords, region_coords_, no_regions_flag = True else: for gt_region in gt_regions_for_image: + gt_x1, gt_y1, gt_x2, gt_y2 = gt_regions_for_image[gt_region] if gt_x1==x1_ and gt_x2==x2_ and gt_y1==y1_ and \ gt_y2==y2_ and gt_region in question: @@ -102,6 +103,107 @@ def rank_regions(image, question, region_coords, region_coords_, return regions +def rank_regions(image, question, region_coords, region_coords_, + gt_regions_for_image, crop=True): + + num_regions, _ = region_coords.shape + regions = dict() + coord_list = [] + no_regions_flag = False + if question is not None: + if 'How manys shapes' in question: + no_regions_flag = True + elif 'How many' in question: + split_question = question.split(" ") + gt_region = split_question[-1] + gt_region = gt_region[2:4] + if gt_region not in gt_regions_for_image: + no_regions_flag = True + else: + coord_list.append(gt_regions_for_image[gt_region]) + elif 'What color' in question: + split_question = question.split(" ") + gt_region = split_question[-1] + gt_region = gt_region[:-1] + if gt_region not in gt_regions_for_image: + no_regions_flag = True + else: + coord_list.append(gt_regions_for_image[gt_region]) + elif 'below' in question: + split_question = question.split(" ") + gt_region = " ".join(split_question[3:5]) + if gt_region not in gt_regions_for_image: + no_regions_flag = True + else: + coord_list.append(gt_regions_for_image[gt_region]) + gt_region = " ".join(split_question[7:9]) + gt_region = gt_region[:-1] + if gt_region not in gt_regions_for_image: + no_regions_flag = True + else: + coord_list.append(gt_regions_for_image[gt_region]) + gt_region = " ".join(split_question[3:9]) + gt_region = gt_region[:-1] + if gt_region not in gt_regions_for_image: + no_regions_flag = True + else: + coord_list.append(gt_regions_for_image[gt_region]) + elif 'Is there' in question: + split_question = question.split(" ") + gt_region = " ".join(split_question[3:5]) + gt_region = gt_region[:-1] + if gt_region not in gt_regions_for_image: + no_regions_flag = True + else: + coord_list.append(gt_regions_for_image[gt_region]) + + num_gt_regions = len(coord_list) + for i in xrange(num_regions): + x1_ = region_coords_[i,0] + y1_ = region_coords_[i,1] + x2_ = region_coords_[i,2] + y2_ = region_coords_[i,3] + + x1 = region_coords[i,0] + y1 = region_coords[i,1] + x2 = region_coords[i,2] + y2 = region_coords[i,3] + + if crop: + cropped_image = image[y1-1:y2, x1-1:x2, :] + else: + cropped_image = None + + score = 0.0 + if no_regions_flag: + score = 1.0/num_regions + else: + for coord in coord_list: + gt_x1, gt_y1, gt_x2, gt_y2 = coord + if gt_x1==x1_ and gt_x2==x2_ and gt_y1==y1_ and gt_y2==y2_: + score = 1.0/num_gt_regions + break; + + regions[i] = region(image=cropped_image, score=score, + coord=region_coords[i,:]) + + return regions + +def get_rel_map(image, scores, region_coords): + num_regions = region_coords.shape[0] + h, w, c = image.shape + rel_map = np.zeros(shape=[h, w, num_regions], dtype=np.float32) + for i in xrange(num_regions): + x1 = region_coords[i,0] + y1 = region_coords[i,1] + x2 = region_coords[i,2] + y2 = region_coords[i,3] + rel_map[y1-1:y2, x1-1:x2, i] = scores[i] + + rel_map = rel_map.max(axis=2) + rel_map = 0.5 + 0.5*rel_map + return rel_map + if __name__=='__main__': image_dir = '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images/' diff --git a/classifiers/region_ranker/train_rel_classifier_simple.py b/classifiers/region_ranker/train_rel_classifier_simple.py index 6ae315b94099acbba0b66229ab17458d1aafe20e..114d3bc8dcf5b9f1d44b9f1b503c18cce34289c1 100644 --- a/classifiers/region_ranker/train_rel_classifier_simple.py +++ b/classifiers/region_ranker/train_rel_classifier_simple.py @@ -4,12 +4,14 @@ import json import math import matplotlib.pyplot as plt import matplotlib.image as mpimg +import scipy.misc import numpy as np import pdb import tensorflow as tf import answer_classifier.ans_data_io_helper as ans_io_helper import region_ranker.perfect_ranker as region_proposer import tf_graph_creation_helper as graph_creator +import rel_graph_creator import plot_helper as plotter val_start_id = 89645 @@ -39,6 +41,7 @@ def batch_recall(pred_scores, gt_scores, k): return batch_recall + def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, parsed_q_dict, ans_vocab, vocab, image_dir, mean_image, start_index, val_set_size, batch_size, plholder_dict, img_height, img_width, batch_creator, @@ -64,10 +67,10 @@ def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, parsed_q_dict, region_scores, 1.0, plholder_dict, - vocab).feed_dict + vocab, + False).feed_dict region_score_pred_eval = region_score_pred.eval(feed_dict) - print region_score_pred_eval recall_at_k += batch_recall(region_score_pred_eval, region_scores, -1) @@ -76,6 +79,92 @@ def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, parsed_q_dict, return recall_at_k +def evaluate_with_vis(region_score_pred, + qa_anno_dict, + region_anno_dict, + parsed_q_dict, + ans_vocab, + vocab, + image_dir, + mean_image, + start_index, + val_set_size, + batch_size, + plholder_dict, + img_height, + img_width, + batch_creator, + html_dir, + whole_image_dir, + verbose=False): + + if not os.path.exists(html_dir): + os.mkdir(html_dir) + + html_filename = os.path.join(html_dir,'index.html') + html_writer = ans_io_helper.html_ans_table_writer(html_filename) + + recall_at_k = 0 + max_iter = int(math.floor(val_set_size/batch_size)) + + for i in xrange(max_iter): + if verbose==True: + print('Iter: ' + str(i+1) + '/' + str(max_iter)) + region_images, ans_labels, parsed_q, \ + region_scores_vec, partition= batch_creator \ + .ans_mini_batch_loader(qa_anno_dict, region_anno_dict, + ans_vocab, vocab, image_dir, mean_image, + start_index+i*batch_size, batch_size, + parsed_q_dict, + img_height, img_width, 3) + region_scores = batch_creator.reshape_score(region_scores_vec) + + feed_dict = ans_io_helper \ + .RelFeedDictCreator(region_images, + parsed_q, + region_scores, + 1.0, + plholder_dict, + vocab, + False).feed_dict + + region_score_pred_eval = region_score_pred.eval(feed_dict) + print region_score_pred_eval.shape + recall_at_k += batch_recall(region_score_pred_eval, + region_scores, -1) + + q_ids = batch_creator.qa_index(start_index+i*batch_size, batch_size) + for j in xrange(batch_size): + q_id = q_ids[j] + image_id = qa_anno_dict[q_id].image_id + question = qa_anno_dict[q_id].question + answer = qa_anno_dict[q_id].answer + image = mpimg.imread(os.path.join(whole_image_dir, + '{}.jpg'.format(image_id))) + rel_map = region_proposer.get_rel_map(image, + region_score_pred_eval[j,:], + ans_io_helper.region_coords_) + rel_map_stacked = np.dstack((rel_map, rel_map, rel_map)) + image = np.multiply(image, rel_map_stacked) + \ + np.multiply(0*image+255, 1-rel_map_stacked) + image_filename = os.path.join(html_dir, + str(image_id) + '_' + \ + str(q_id) + '.jpg') + scipy.misc.imsave(image_filename, image) + col_dict = { + 0: q_id, + 1: question, + 2: answer, + 3: html_writer.image_tag(str(image_id) + '_' + \ + str(q_id) + '.jpg', 50, 50), + } + html_writer.add_element(col_dict) + + html_writer.close_file() + recall_at_k /= max_iter + + return recall_at_k + def train(train_params): sess = tf.InteractiveSession() @@ -125,74 +214,43 @@ def train(train_params): y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat) atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat') atr_feat = atr_feat_op.outputs[0] - y_pred = graph_creator.rel_comp_graph(plholder_dict, - obj_feat, atr_feat, - y_pred_obj, y_pred_atr, mode, - keep_prob, len(vocab), batch_size) + rel_graph = rel_graph_creator.rel_graph_creator(plholder_dict, + keep_prob, + obj_feat, + atr_feat, + y_pred_obj, + y_pred_atr, + len(vocab), + batch_size, + graph_creator.graph_config, + mode, + True) + + y_pred = rel_graph.rel_score accuracy = graph_creator.evaluation(y, y_pred) cross_entropy = graph_creator.loss(y, y_pred) - # Collect variables - params_varnames = [ - 'rel/word_embed/word_vecs', - 'rel/conv1/W', - 'rel/conv2/W', - 'rel/conv1/b', - 'rel/conv2/b', - 'rel/fc1/W_reg', - 'rel/fc1/W_q', - 'rel/fc1/W_obj', - 'rel/fc1/W_atr', - 'rel/fc1/W_explt', - 'rel/fc1/b', - 'rel/fc2/W', - 'rel/fc2/b', - ] - - vars_dict = graph_creator.get_list_of_variables(params_varnames) - - # parameters grouped together - rel_word_params = [ - vars_dict['rel/word_embed/word_vecs'], - ] - - rel_conv_params = [ - vars_dict['rel/conv1/W'], - vars_dict['rel/conv2/W'], - ] - - rel_fc_params = [ - vars_dict['rel/fc1/W_reg'], - vars_dict['rel/fc1/W_q'], - vars_dict['rel/fc1/W_obj'], - vars_dict['rel/fc1/W_atr'], - vars_dict['rel/fc1/W_explt'], - vars_dict['rel/fc2/W'], - ] - # Regularization - regularizer_rel_word_vecs = graph_creator.regularize_params(rel_word_params) - regularizer_rel_filters = graph_creator.regularize_params(rel_conv_params) - regularizer_rel_fcs = graph_creator.regularize_params(rel_fc_params) - - total_loss = cross_entropy + \ - 1e-4 * regularizer_rel_word_vecs + \ - 1e-3 * regularizer_rel_filters + \ - 1e-4 * regularizer_rel_fcs + vars_to_regularize = tf.get_collection('regularize') + + total_loss = cross_entropy + for var in vars_to_regularize: + print var.name + total_loss += 1e-4 * tf.nn.l2_loss(var) # Restore weights obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj') atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel') - vars_to_save = rel_vars + atr_vars + obj_vars - vars_to_train = rel_vars[:] - pretrained_vars = atr_vars + obj_vars + obj_atr_restorer = tf.train.Saver(obj_vars + atr_vars) + obj_atr_restorer.restore(sess, obj_atr_model) # Model to save and restore weights from - model_saver = tf.train.Saver(vars_to_save) + model_saver = tf.train.Saver() + if train_params['fine_tune']==True: pretrained_model = os.path.join(outdir, 'rel_classifier_' + mode +'-'+ \ @@ -200,38 +258,29 @@ def train(train_params): assert (os.path.exists(pretrained_model)), \ 'Pretrained model does not exist' model_saver.restore(sess, pretrained_model) - pretrained_vars = vars_to_save[:] start_epoch = train_params['start_model'] + 1 else: assert (os.path.exists(obj_atr_model)), \ 'Obj_Atr model does not exist' - obj_atr_restorer = tf.train.Saver(pretrained_vars) - obj_atr_restorer.restore(sess, obj_atr_model) start_epoch = 0 # Attach optimization ops + all_vars_without_optim = tf.all_variables() + vars_to_train = rel_vars train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \ .minimize(total_loss, var_list=vars_to_train) # Initialize uninitialized vars - all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) - vars_to_init = [var for var in all_vars if var not in pretrained_vars] + all_vars = tf.all_variables() + optimizer_vars = [var for var in all_vars if var not in + all_vars_without_optim] + + if train_params['fine_tune']: + vars_to_init = optimizer_vars + else: + vars_to_init = optimizer_vars + rel_vars sess.run(tf.initialize_variables(vars_to_init)) - print('-----------------') - print 'Variables to train:' - print [var.name for var in vars_to_train] - print('-----------------') - print 'Pretrained variables:' - print [var.name for var in pretrained_vars] - print('-----------------') - print 'Variables to initialize:' - print [var.name for var in vars_to_init] - print('-----------------') - print 'Variables to save' - print [var.name for var in vars_to_save] - print('-----------------') - # Load mean image mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \ 'Obj_Classifier/mean_image.npy') @@ -287,7 +336,8 @@ def train(train_params): train_region_score, 0.5, plholder_dict, - vocab).feed_dict + vocab, + True).feed_dict _, current_train_batch_acc, y_pred_eval, loss_eval = \ sess.run([train_step, accuracy, y_pred, total_loss], diff --git a/classifiers/rel_graph_creator.py b/classifiers/rel_graph_creator.py index 91fe8e5aa60c5087191be0e82b61b8db99ef2d20..8ddaa2ca797d033cbdd48bc0b798ca1e82215ee2 100644 --- a/classifiers/rel_graph_creator.py +++ b/classifiers/rel_graph_creator.py @@ -4,19 +4,18 @@ import pdb import tensorflow as tf import answer_classifier.ans_data_io_helper as ans_io_helper from tf_graph_creation_helper import weight_variable, bias_variable, \ - q_bin_embed_graph, conv2d, max_pool_2x2, batchnorm + q_bin_embed_graph, conv2d, max_pool_2x2, batchnorm, explicit_feat_graph -class ans_graph_creator(): +class rel_graph_creator(): def __init__(self, plholder_dict, + keep_prob, obj_feat, atr_feat, obj_prob, atr_prob, - vocab, - inv_vocab, - ans_vocab, + vocab_size, batch_size, graph_config, mode='q_obj_atr', @@ -24,11 +23,10 @@ class ans_graph_creator(): self.mode = mode self.is_train = plholder_dict['is_train'] - self.keep_prob = plholder_dict['keep_prob'] + self.keep_prob = keep_prob image_regions = plholder_dict['image_regions'] - vocab_size = len(vocab) - with tf.name_scope('ans') as ans_graph: + with tf.name_scope('rel') as rel_graph: # Word Vectors word_vecs = self.create_word_vecs(vocab_size, graph_config['word_vec_dim']) @@ -36,22 +34,28 @@ class ans_graph_creator(): # Feature Computations q_feat = self.add_q_feat_comp_layer(word_vecs, plholder_dict) reg_feat = self.add_reg_feat_comp_layer(image_regions) + explt_feat = self.add_explt_feat_comp_layer(obj_prob, + atr_prob, + plholder_dict) # Feature Projections (with batch norm) feat_proj_dim = graph_config['joint_embed_dim'] proj_feat = dict() - proj_feat['q'] = self.fc_layer(q_feat, feat_proj_dim, - 'q_feat_proj_layer') - - proj_feat['reg'] = self.fc_layer(reg_feat, feat_proj_dim, - 'reg_feat_proj_layer') + proj_feat['q'] = self.feat_proj_layer(q_feat, feat_proj_dim, + 'q_feat_proj_layer') + + proj_feat['reg'] = self.feat_proj_layer(reg_feat, feat_proj_dim, + 'reg_feat_proj_layer') - proj_feat['obj'] = self.fc_layer(obj_feat, feat_proj_dim, - 'obj_feat_proj_layer') + proj_feat['obj'] = self.feat_proj_layer(obj_feat, feat_proj_dim, + 'obj_feat_proj_layer') - proj_feat['atr'] = self.fc_layer(atr_feat, feat_proj_dim, - 'atr_feat_proj_layer') + proj_feat['atr'] = self.feat_proj_layer(atr_feat, feat_proj_dim, + 'atr_feat_proj_layer') + + proj_feat['explt'] = self.feat_proj_layer(explt_feat, feat_proj_dim, + 'explt_feat_proj_layer') # Feature Combination coeffs = self.mixing_coeffs() @@ -62,16 +66,12 @@ class ans_graph_creator(): for feat_type, feat in proj_feat.items(): comb_feat = comb_feat + feat * coeffs[feat_type] - # Answer feature - ans_feat = self.compute_ans_feat(word_vecs, vocab, ans_vocab) - - # Proj answer - proj_ans_feat = self.fc_layer(ans_feat, feat_proj_dim, - 'ans_feat_proj_layer') - - # Compute Cosine Distance - self.cosine_dist = self.compute_cosine_dist(comb_feat, - proj_ans_feat) + bn_comb_feat = batchnorm(comb_feat, None, self.is_train) + + # Softmax scores + self.rel_score = self.softmax_layer(tf.nn.relu(bn_comb_feat), + batch_size, + ans_io_helper.num_proposals) def create_word_vecs(self, vocab_size, word_vec_dim): word_vecs = weight_variable([vocab_size, @@ -124,49 +124,68 @@ class ans_graph_creator(): return reg_feat - def fc_layer(self, feat, proj_dim, name_scope): + def add_explt_feat_comp_layer(self, obj_prob, atr_prob, plholder_dict): + with tf.name_scope('explicit_feat') as expl_feat: + explt_feat_list = [] + for bin_num in xrange(4): + bin_name = 'bin'+ str(bin_num) + explt_feat_list.append(explicit_feat_graph(bin_name, obj_prob, + 'obj', plholder_dict)) + explt_feat_list.append(explicit_feat_graph(bin_name, atr_prob, + 'atr', plholder_dict)) + + concat_explt_feat = tf.concat(1, explt_feat_list, + name = 'concat_explt_feat') + + return concat_explt_feat + + def feat_proj_layer(self, feat, proj_dim, name_scope): with tf.name_scope(name_scope) as fc_layer: feat_dim = feat.get_shape()[1].value W1 = weight_variable([feat_dim, proj_dim]) b1 = bias_variable([proj_dim]) - proj_feat = tf.add(tf.matmul(feat, W1), b1) - bn_proj_feat = batchnorm(proj_feat, None, self.is_train) + proj_feat1 = tf.add(tf.matmul(feat, W1), b1) + bn_proj_feat1 = batchnorm(proj_feat1, None, self.is_train) W2 = weight_variable([proj_dim, proj_dim]) b2 = bias_variable([proj_dim]) - bn_proj_feat = tf.add(tf.matmul(tf.nn.relu(bn_proj_feat), W2), b2) + bn_proj_feat2 = tf.add(tf.matmul(tf.nn.relu(bn_proj_feat1), W2), b2) + tf.add_to_collection('regularize', W1) tf.add_to_collection('regularize', W2) - return bn_proj_feat + return bn_proj_feat2 def mixing_coeffs(self): - feat_types = ['q', 'obj', 'atr', 'reg'] + feat_types = ['q', 'obj', 'atr', 'reg', 'explt'] coeffs = dict() count = 0; for feat_type in feat_types: if feat_type in self.mode: - coeffs[feat_type] = 1.0 - count += 1 + if feat_type is 'explt': + coeffs[feat_type] = 1.0 + count += 1 + else: + coeffs[feat_type] = 1.0 + count += 1 else: coeffs[feat_type] = 0.0 coeffs = {k: v/count for k, v in coeffs.items()} return coeffs - def compute_ans_feat(self, word_vecs, vocab, ans_vocab): - ans_vocab_size = len(ans_vocab) - inv_ans_vocab = {v:k for k, v in ans_vocab.items()} - ans_in_vocab_ids_list = [] - for i in xrange(ans_vocab_size): - ans_in_vocab_ids_list.append(vocab[inv_ans_vocab[i]]) - - ans_in_vocab_ids_tensor = tf.constant(ans_in_vocab_ids_list, - dtype=tf.int64) - ans_feat = tf.nn.embedding_lookup(word_vecs, ans_in_vocab_ids_tensor, - name='ans_feat') - return ans_feat - - def compute_cosine_dist(self, feat1, feat2): - feat1 = tf.nn.l2_normalize(feat1, 1) - feat2 = tf.nn.l2_normalize(feat2, 1) - return tf.matmul(feat1, tf.transpose(feat2)) - + def softmax_layer(self, feats, batch_size, num_proposals): + feat_dim = feats.get_shape()[1].value + with tf.name_scope('softmax_layer') as softmax_layer: + W = weight_variable([feat_dim, 1]) + b = bias_variable([1]) + + vec_logits = tf.add(tf.matmul(feats, W), b, + name='vec_logits') + + logits = tf.reshape(vec_logits, + [batch_size, num_proposals]) + + y_pred = tf.nn.softmax(logits, name='softmax') + + tf.add_to_collection('regularize', W) + + return y_pred diff --git a/classifiers/rel_graph_creator2.py b/classifiers/rel_graph_creator2.py new file mode 100644 index 0000000000000000000000000000000000000000..a17f4491c46269711b89b51fff343dc52537baf9 --- /dev/null +++ b/classifiers/rel_graph_creator2.py @@ -0,0 +1,186 @@ +import numpy as np +import math +import pdb +import tensorflow as tf +import answer_classifier.ans_data_io_helper as ans_io_helper +from tf_graph_creation_helper import weight_variable, bias_variable, \ + q_bin_embed_graph, conv2d, max_pool_2x2, batchnorm, explicit_feat_graph + + +class rel_graph_creator(): + def __init__(self, + plholder_dict, + keep_prob, + obj_feat, + atr_feat, + obj_prob, + atr_prob, + vocab_size, + batch_size, + graph_config, + mode='q_obj_atr', + is_train=True): + + self.mode = mode + self.is_train = plholder_dict['is_train'] + self.keep_prob = keep_prob + image_regions = plholder_dict['image_regions'] + + with tf.name_scope('rel') as rel_graph: + # Word Vectors + word_vecs = self.create_word_vecs(vocab_size, + graph_config['word_vec_dim']) + + # Feature Computations + q_feat = self.add_q_feat_comp_layer(word_vecs, plholder_dict) + reg_feat = self.add_reg_feat_comp_layer(image_regions) + explt_feat = self.add_explt_feat_comp_layer(obj_prob, + atr_prob, + plholder_dict) + + # Feature Projections (with batch norm) + feat_proj_dim = graph_config['joint_embed_dim'] + proj_feat = dict() + + proj_feat['q'] = self.feat_proj_layer(q_feat, feat_proj_dim, + 'q_feat_proj_layer') + + proj_feat['reg'] = self.feat_proj_layer(reg_feat, feat_proj_dim, + 'reg_feat_proj_layer') + + proj_feat['obj'] = self.feat_proj_layer(obj_feat, feat_proj_dim, + 'obj_feat_proj_layer') + + proj_feat['atr'] = self.feat_proj_layer(atr_feat, feat_proj_dim, + 'atr_feat_proj_layer') + + # Feature Combination + coeffs = self.mixing_coeffs() + print coeffs + num_regions = batch_size*ans_io_helper.num_proposals + comb_feat = tf.zeros(shape=[num_regions, feat_proj_dim], + dtype=tf.float32) + for feat_type, feat in proj_feat.items(): + comb_feat = comb_feat + feat * coeffs[feat_type] + + bn_comb_feat = batchnorm(comb_feat, None, self.is_train) + bn_explt_feat = batchnorm(explt_feat, None, self.is_train) + + # Softmax scores + final_feat = tf.concat(1, [bn_comb_feat, bn_explt_feat]) + self.rel_score = self.softmax_layer(tf.nn.relu(final_feat), + batch_size, + ans_io_helper.num_proposals) + + def create_word_vecs(self, vocab_size, word_vec_dim): + word_vecs = weight_variable([vocab_size, + word_vec_dim], + var_name='word_vecs') + word_vecs = tf.nn.l2_normalize(word_vecs, 1) + tf.add_to_collection('regularize',word_vecs) + return word_vecs + + def add_q_feat_comp_layer(self, word_vecs, plholder_dict): + with tf.name_scope('q_feat_comp_layer') as q_feat_comp_layer: + bin0_embed = q_bin_embed_graph('bin0', word_vecs, plholder_dict) + bin1_embed = q_bin_embed_graph('bin1', word_vecs, plholder_dict) + bin2_embed = q_bin_embed_graph('bin2', word_vecs, plholder_dict) + bin3_embed = q_bin_embed_graph('bin3', word_vecs, plholder_dict) + q_feat = tf.concat(1, + [bin0_embed, bin1_embed, bin2_embed, bin3_embed], + name='q_feat') + return q_feat + + def add_reg_feat_comp_layer(self, image_regions): + with tf.name_scope('reg_feat_comp_layer') as reg_feat_comp_layer: + with tf.name_scope('conv1') as conv1: + W_conv1 = weight_variable([5,5,3,4]) + b_conv1 = bias_variable([4]) + a_conv1 = tf.add(conv2d(image_regions, W_conv1), + b_conv1, name='a') + h_conv1 = tf.nn.relu(a_conv1, name='h') + h_pool1 = max_pool_2x2(h_conv1) + h_conv1_drop = tf.nn.dropout(h_pool1, self.keep_prob, + name='h_pool_drop') + + with tf.name_scope('conv2') as conv2: + W_conv2 = weight_variable([3,3,4,8]) + b_conv2 = bias_variable([8]) + a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a') + h_conv2 = tf.nn.relu(a_conv2, name='h') + h_pool2 = max_pool_2x2(h_conv2) + h_pool2_drop = tf.nn.dropout(h_pool2, self.keep_prob, + name='h_pool_drop') + h_pool2_drop_shape = h_pool2_drop.get_shape() + reg_feat_dim = reduce(lambda f, g: f*g, + [dim.value for dim in + h_pool2_drop_shape[1:]]) + reg_feat = tf.reshape(h_pool2_drop, [-1, reg_feat_dim], + name='reg_feat') + + tf.add_to_collection('regularize', W_conv1) + tf.add_to_collection('regularize', W_conv2) + + return reg_feat + + def add_explt_feat_comp_layer(self, obj_prob, atr_prob, plholder_dict): + with tf.name_scope('explicit_feat') as expl_feat: + explt_feat_list = [] + for bin_num in xrange(4): + bin_name = 'bin'+ str(bin_num) + explt_feat_list.append(explicit_feat_graph(bin_name, obj_prob, + 'obj', plholder_dict)) + explt_feat_list.append(explicit_feat_graph(bin_name, atr_prob, + 'atr', plholder_dict)) + + concat_explt_feat = tf.concat(1, explt_feat_list, + name = 'concat_explt_feat') + + return concat_explt_feat + + def feat_proj_layer(self, feat, proj_dim, name_scope): + with tf.name_scope(name_scope) as fc_layer: + feat_dim = feat.get_shape()[1].value + W1 = weight_variable([feat_dim, proj_dim]) + b1 = bias_variable([proj_dim]) + proj_feat1 = tf.add(tf.matmul(feat, W1), b1) + bn_proj_feat1 = batchnorm(proj_feat1, None, self.is_train) + W2 = weight_variable([proj_dim, proj_dim]) + b2 = bias_variable([proj_dim]) + bn_proj_feat2 = tf.add(tf.matmul(tf.nn.relu(bn_proj_feat1), W2), b2) + + tf.add_to_collection('regularize', W1) + tf.add_to_collection('regularize', W2) + + return bn_proj_feat2 + + def mixing_coeffs(self): + feat_types = ['q', 'obj', 'atr', 'reg'] + coeffs = dict() + count = 0; + for feat_type in feat_types: + if feat_type in self.mode: + coeffs[feat_type] = 1.0 + else: + coeffs[feat_type] = 0.0 + count += coeffs[feat_type] + coeffs = {k: v/count for k, v in coeffs.items()} + return coeffs + + def softmax_layer(self, feats, batch_size, num_proposals): + feat_dim = feats.get_shape()[1].value + with tf.name_scope('softmax_layer') as softmax_layer: + W = weight_variable([feat_dim, 1]) + b = bias_variable([1]) + + vec_logits = tf.add(tf.matmul(feats, W), b, + name='vec_logits') + + logits = tf.reshape(vec_logits, + [batch_size, num_proposals]) + + y_pred = tf.nn.softmax(logits, name='softmax') + + tf.add_to_collection('regularize', W) + + return y_pred diff --git a/classifiers/tf_graph_creation_helper.py b/classifiers/tf_graph_creation_helper.py index 9b2d36b1c732340ac0349bbf60881edeea933c32..8419909c7346dbb95ffc309c540f2c0fe8481d2b 100644 --- a/classifiers/tf_graph_creation_helper.py +++ b/classifiers/tf_graph_creation_helper.py @@ -75,6 +75,7 @@ def placeholder_inputs_rel(num_proposals, total_vocab_size, mode = 'gt'): 'image_regions': tf.placeholder(tf.float32, [None,25,25,3], 'image_regions'), 'keep_prob': tf.placeholder(tf.float32, name='keep_prob'), + 'is_train': tf.placeholder(tf.bool, [], 'is_train'), } for i in xrange(4): bin_name = 'bin' + str(i) diff --git a/classifiers/train_classifiers.py b/classifiers/train_classifiers.py index 46c1acd00521f8ab9bc6ed0e8ec6585a5d033cdc..52d67185dd75b6461a2245a006d72a95438ad743 100644 --- a/classifiers/train_classifiers.py +++ b/classifiers/train_classifiers.py @@ -12,8 +12,10 @@ import attribute_classifiers.train_atr_classifier as atr_trainer import attribute_classifiers.eval_atr_classifier as atr_evaluator #import answer_classifier.train_ans_classifier as ans_trainer import answer_classifier.train_ans_classifier_simple as ans_trainer -import region_ranker.train_rel_classifier as rel_trainer -import region_ranker.eval_rel_classifier as rel_evaluator +#import region_ranker.train_rel_classifier as rel_trainer +import region_ranker.train_rel_classifier_simple as rel_trainer +#import region_ranker.eval_rel_classifier as rel_evaluator +import region_ranker.eval_rel_classifier_simple as rel_evaluator workflow = { 'train_obj': False, @@ -70,15 +72,15 @@ rel_classifier_train_params = { 'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json', 'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images', 'image_regions_dir': '/mnt/ramdisk/image_regions', - 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt', + 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt_At_End', 'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1', 'mode': 'q_obj_atr_reg_explt', - 'adam_lr' : 0.0001, + 'adam_lr' : 0.001, 'crop_n_save_regions': False, - 'max_epoch': 10, + 'max_epoch': 5, 'batch_size': 10, - 'fine_tune': True, - 'start_model': 4, # Used only if fine_tune is True + 'fine_tune': False, + 'start_model': 0, # Used only if fine_tune is True } rel_classifier_eval_params = { @@ -88,9 +90,9 @@ rel_classifier_eval_params = { 'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json', 'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images', 'image_regions_dir': '/mnt/ramdisk/image_regions', - 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt', - 'model_basedir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt', - 'model_number': 9, + 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt_At_End', + 'model_basedir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt_At_End', + 'model_number': 4, 'mode': 'q_obj_atr_reg_explt', 'batch_size': 20, 'test_start_id': 94645, @@ -105,15 +107,15 @@ ans_classifier_train_params = { 'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images', 'image_regions_dir': '/mnt/ramdisk/image_regions', 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin', - 'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt/rel_classifier_q_obj_atr_reg_explt-9', + 'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt_At_End/rel_classifier_q_obj_atr_reg_explt-4', 'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1', - 'adam_lr' : 0.0001, - 'mode' : 'q_obj_atr', + 'adam_lr' : 0.001, + 'mode' : 'q_obj_atr_reg', 'crop_n_save_regions': False, 'max_epoch': 5, 'batch_size': 10, - 'fine_tune': True, - 'start_model': 1, # When fine_tune is false used to pre-initialize q_obj_atr with q model etc + 'fine_tune': False, + 'start_model': 0, # When fine_tune is false used to pre-initialize q_obj_atr with q model etc } if __name__=='__main__':