diff --git a/classifiers/answer_classifier/eval_ans_classifier.py b/classifiers/answer_classifier/eval_ans_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..35eb6578875181654e45d90bf3b95cf67062c824 --- /dev/null +++ b/classifiers/answer_classifier/eval_ans_classifier.py @@ -0,0 +1,172 @@ +import sys +import os +import json +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +import numpy as np +import math +import pdb +import tensorflow as tf +import tf_graph_creation_helper as graph_creator +import plot_helper as plotter +import ans_data_io_helper as ans_io_helper +import region_ranker.perfect_ranker as region_proposer +import train_ans_classifier as ans_trainer + +def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab, + image_dir, mean_image, start_index, val_set_size, batch_size, + placeholders, img_height=100, img_width=100): + + inv_ans_vocab = {v: k for k, v in ans_vocab.items()} + pred_list = [] + correct = 0 + max_iter = int(math.ceil(val_set_size*1.0/batch_size)) +# print ([val_set_size, batch_size]) +# print('max_iter: ' + str(max_iter)) + batch_size_tmp = batch_size + for i in xrange(max_iter): + if i==(max_iter-1): + batch_size_tmp = val_set_size - i*batch_size + print('Iter: ' + str(i+1) + '/' + str(max_iter)) +# print batch_size_tmp + region_images, ans_labels, questions, \ + region_score, partition= \ + ans_io_helper.ans_mini_batch_loader(qa_anno_dict, + region_anno_dict, + ans_vocab, vocab, + image_dir, mean_image, + start_index+i*batch_size, + batch_size_tmp, + img_height, img_width, 3) + + # print [start_index+i*batch_size, + # start_index+i*batch_size + batch_size_tmp -1] + if i==max_iter-1: + + residual_batch_size = batch_size - batch_size_tmp + residual_regions = residual_batch_size*ans_io_helper.num_proposals + + residual_region_images = np.zeros(shape=[residual_regions, + img_height/3, img_width/3, + 3]) + residual_questions = np.zeros(shape=[residual_regions, + len(vocab)]) + residual_ans_labels = np.zeros(shape=[residual_batch_size, + len(ans_vocab)]) + residual_region_score = np.zeros(shape=[1, residual_regions]) + + region_images = np.concatenate((region_images, + residual_region_images), + axis=0) + questions = np.concatenate((questions, residual_questions), axis=0) + ans_labels = np.concatenate((ans_labels, residual_ans_labels), + axis=0) + region_score = np.concatenate((region_score, residual_region_score), + axis=1) + # print region_images.shape + # print questions.shape + # print ans_labels.shape + # print region_score.shape + + feed_dict = { + placeholders[0] : region_images, + placeholders[1] : questions, + placeholders[2] : 1.0, + placeholders[3] : ans_labels, + placeholders[4] : region_score, + } + + ans_ids = np.argmax(y.eval(feed_dict), 1) + for j in xrange(batch_size_tmp): + pred_list = pred_list + [{ + 'question_id' : start_index+i*batch_size+j, + 'answer' : inv_ans_vocab[ans_ids[j]] + }] + # print qa_anno_dict[start_index+i*batch_size+j].question + # print inv_ans_vocab[ans_ids[j]] + + return pred_list + +def eval(eval_params): + sess = tf.InteractiveSession() + + train_anno_filename = eval_params['train_json'] + test_anno_filename = eval_params['test_json'] + regions_anno_filename = eval_params['regions_json'] + image_regions_dir = eval_params['image_regions_dir'] + outdir = eval_params['outdir'] + model = eval_params['model'] + batch_size = eval_params['batch_size'] + test_start_id = eval_params['test_start_id'] + test_set_size = eval_params['test_set_size'] + if not os.path.exists(outdir): + os.mkdir(outdir) + + qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename) + qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename) + region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename) + ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict() + vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train) + + # Create graph + g = tf.get_default_graph() + image_regions, questions, keep_prob, y, region_score= \ + graph_creator.placeholder_inputs_ans(len(vocab), len(ans_vocab), + mode='gt') + y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0) + obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat') + obj_feat = obj_feat_op.outputs[0] + y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat) + atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat') + atr_feat = atr_feat_op.outputs[0] + + y_pred = graph_creator.ans_comp_graph(image_regions, questions, keep_prob, + obj_feat, atr_feat, vocab, + inv_vocab, len(ans_vocab), + eval_params['mode']) + y_avg = graph_creator.aggregate_y_pred(y_pred, region_score, batch_size, + ans_io_helper.num_proposals, + len(ans_vocab)) + + cross_entropy = graph_creator.loss(y, y_avg) + accuracy = graph_creator.evaluation(y, y_avg) + + # Restore model + saver = tf.train.Saver() + if os.path.exists(model): + saver.restore(sess, model) + else: + print 'Failed to read model from file ' + model + + mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \ + 'Obj_Classifier/mean_image.npy') + + placeholders = [image_regions, questions, keep_prob, y, region_score] + + # Get predictions + pred_dict =get_pred(y_avg, qa_anno_dict, region_anno_dict, ans_vocab, vocab, + image_regions_dir, mean_image, test_start_id, + test_set_size, batch_size, placeholders, 75, 75) + + json_filename = os.path.join(outdir, 'predicted_ans_' + \ + eval_params['mode'] + '.json') + with open(json_filename,'w') as json_file: + json.dump(pred_dict, json_file) + + + +if __name__=='__main__': + ans_classifier_eval_params = { + 'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json', + 'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json', + 'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json', + 'image_regions_dir': '/mnt/ramdisk/image_regions', + 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier', + 'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier/ans_classifier_q_obj_atr-9', + 'mode' : 'q_obj_atr', + 'batch_size': 20, + 'test_start_id': 111352, #+48600, + 'test_set_size': 160725-111352+1, + } + + eval(ans_classifier_eval_params) diff --git a/classifiers/answer_classifier/train_ans_classifier.py b/classifiers/answer_classifier/train_ans_classifier.py index a4dfddec88da3b593d79ebcb144e0cccdfa78528..524d294c4184be372c38958552e87f4883ce163a 100644 --- a/classifiers/answer_classifier/train_ans_classifier.py +++ b/classifiers/answer_classifier/train_ans_classifier.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np import math +import random import pdb import tensorflow as tf import object_classifiers.obj_data_io_helper as obj_data_loader @@ -14,18 +15,17 @@ import plot_helper as plotter import ans_data_io_helper as ans_io_helper import region_ranker.perfect_ranker as region_proposer import time + val_start_id = 106115 -val_batch_size = 5000 -val_batch_size_small = 100 -batch_size = 20 -crop_n_save_regions = False -restore_intermediate_model = True +val_set_size = 5000 +val_set_size_small = 100 + def evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab, - image_dir, mean_image, start_index, val_batch_size, + image_dir, mean_image, start_index, val_set_size, batch_size, placeholders, img_height=100, img_width=100): correct = 0 - max_iter = int(math.floor(val_batch_size/batch_size)) + max_iter = int(math.floor(val_set_size/batch_size)) for i in xrange(max_iter): region_images, ans_labels, questions, \ region_score, partition= \ @@ -53,24 +53,15 @@ def evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab, def train(train_params): sess = tf.InteractiveSession() - train_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \ - 'shapes_dataset/train_anno.json' - - test_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \ - 'shapes_dataset/test_anno.json' - - regions_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \ - 'shapes_dataset/regions_anno.json' - - image_dir = '/home/tanmay/Code/GenVQA/GenVQA/' + \ - 'shapes_dataset/images' + train_anno_filename = train_params['train_json'] + test_anno_filename = train_params['test_json'] + regions_anno_filename = train_params['regions_json'] + image_dir = train_params['image_dir'] + image_regions_dir = train_params['image_regions_dir'] + outdir = train_params['outdir'] + obj_atr_model = train_params['obj_atr_model'] + batch_size = train_params['batch_size'] - # image_regions_dir = '/home/tanmay/Code/GenVQA/Exp_Results/' + \ - # 'image_regions' - - image_regions_dir = '/mnt/ramdisk/image_regions' - - outdir = '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier' if not os.path.exists(outdir): os.mkdir(outdir) @@ -80,7 +71,7 @@ def train(train_params): vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict) # Save region crops - if crop_n_save_regions == True: + if train_params['crop_n_save_regions'] == True: qa_anno_dict_test = ans_io_helper.parse_qa_anno(test_anno_filename) ans_io_helper.save_regions(image_dir, image_regions_dir, qa_anno_dict, region_anno_dict, @@ -91,24 +82,27 @@ def train(train_params): # Create graph + g = tf.get_default_graph() image_regions, questions, keep_prob, y, region_score= \ graph_creator.placeholder_inputs_ans(len(vocab), len(ans_vocab), mode='gt') y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0) - obj_feat = tf.get_collection('obj_feat', scope='obj/conv2') - y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat[0]) - atr_feat = tf.get_collection('atr_feat', scope='atr/conv2') - - # model restoration -# obj_atr_saver = tf.train.Saver() - model_to_restore = '/home/tanmay/Code/GenVQA/GenVQA/classifiers/' + \ - 'saved_models/obj_atr_classifier-1' -# obj_atr_saver.restore(sess, model_to_restore) - - y_pred, logits = graph_creator.ans_comp_graph(image_regions, - questions, keep_prob, \ - obj_feat[0], atr_feat[0], - vocab, inv_vocab, len(ans_vocab)) + obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat') + obj_feat = obj_feat_op.outputs[0] + y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat) + atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat') + atr_feat = atr_feat_op.outputs[0] + + # Restore obj and attribute classifier parameters + obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj') + atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + obj_atr_saver = tf.train.Saver(obj_vars+atr_vars) + obj_atr_saver.restore(sess, obj_atr_model) + + y_pred = graph_creator.ans_comp_graph(image_regions, questions, keep_prob, + obj_feat, atr_feat, vocab, + inv_vocab, len(ans_vocab), + train_params['mode']) y_avg = graph_creator.aggregate_y_pred(y_pred, region_score, batch_size, ans_io_helper.num_proposals, len(ans_vocab)) @@ -117,66 +111,137 @@ def train(train_params): accuracy = graph_creator.evaluation(y, y_avg) # Collect variables - vars_to_opt = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans') - - train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \ - .minimize(cross_entropy, var_list=vars_to_opt) - - word_embed = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans/word_embed') - vars_to_restore = \ - tf.get_collection(tf.GraphKeys.VARIABLES,scope='obj') + \ - tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + \ - [word_embed[0]] + ans_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans') + list_of_vars = [ + 'ans/word_embed/word_vecs', + 'ans/fc1/W_region', + 'ans/fc1/W_obj', + 'ans/fc1/W_atr', + 'ans/fc1/W_q', + 'ans/fc1/b', + 'ans/fc2/W', + 'ans/fc2/b' + ] + vars_dict = graph_creator.get_list_of_variables(list_of_vars) - all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) + if train_params['mode']=='q': + pretrained_vars_high_lr = [] + pretrained_vars_low_lr = [] + partial_model = '' - vars_to_init = [var for var in all_vars if var not in vars_to_restore] - vars_to_save = tf.get_collection(tf.GraphKeys.VARIABLES,scope='obj') + \ - tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + \ - tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans') - - print('vars_to_save: ') - print([var.name for var in vars_to_save]) - # Session saver - - saver = tf.train.Saver(vars_to_restore) - saver2 = tf.train.Saver(vars_to_save) - if restore_intermediate_model==True: - intermediate_model = '/home/tanmay/Code/GenVQA/Exp_Results/' + \ - 'Ans_Classifier/ans_classifier_question_only-9' - print('vars_to_restore: ') - print([var.name for var in vars_to_restore]) - print('vars_to_init: ') - print([var.name for var in vars_to_init]) - saver.restore(sess, intermediate_model) -# print('Initializing variables') - sess.run(tf.initialize_variables(vars_to_init)) - start_epoch = 0 + elif train_params['mode']=='q_obj_atr' or \ + train_params['mode']=='q_reg': + + pretrained_vars_low_lr = [ + vars_dict['ans/word_embed/word_vecs'], + ] + pretrained_vars_high_lr = [ + vars_dict['ans/fc1/W_q'], + vars_dict['ans/fc1/b'], + vars_dict['ans/fc2/W'], + vars_dict['ans/fc2/b'] + ] + partial_model = os.path.join(outdir, 'ans_classifier_q-' + \ + str(train_params['start_model'])) + + elif train_params['mode']=='q_obj_atr_reg': + pretrained_vars_low_lr = [ + vars_dict['ans/word_embed/word_vecs'], + vars_dict['ans/fc1/W_q'], + vars_dict['ans/fc1/W_obj'], + vars_dict['ans/fc1/W_atr'], + vars_dict['ans/fc1/b'], + ] + pretrained_vars_high_lr = [ + vars_dict['ans/fc2/W'], + vars_dict['ans/fc2/b'] + ] + partial_model = os.path.join(outdir, 'ans_classifier_q_obj_atr-' + \ + str(train_params['start_model'])) + + # Fine tune begining with a previous model + if train_params['fine_tune']==True: + partial_model = os.path.join(outdir, 'ans_classifier_' + \ + train_params['mode'] + '-' + \ + str(train_params['start_model'])) + start_epoch = train_params['start_model']+1 else: - # Initializing all variables except those restored - print('Initializing variables') - sess.run(tf.initialize_variables(vars_to_init)) start_epoch = 0 + # Restore partial model + vars_to_save = obj_vars + atr_vars + ans_vars + partial_saver = tf.train.Saver(vars_to_save) + if os.path.exists(partial_model): + partial_saver.restore(sess, partial_model) + + # Variables to train from scratch + all_pretrained_vars = pretrained_vars_low_lr + pretrained_vars_high_lr + vars_to_train_from_scratch = \ + [var for var in ans_vars if var not in all_pretrained_vars] + + # Attach optimization ops + train_step_high_lr = tf.train.AdamOptimizer(train_params['adam_high_lr']) \ + .minimize(cross_entropy, + var_list=vars_to_train_from_scratch + + pretrained_vars_high_lr) + print('Parameters trained with high lr (' + + str(train_params['adam_high_lr']) + '): ') + print([var.name for var in vars_to_train_from_scratch + + pretrained_vars_high_lr]) + + if pretrained_vars_low_lr: + train_step_low_lr = tf.train \ + .AdamOptimizer(train_params['adam_low_lr']) \ + .minimize(cross_entropy, + var_list=pretrained_vars_low_lr) + print('Parameters trained with low lr(' + + str(train_params['adam_low_lr']) + '): ') + print([var.name for var in pretrained_vars_low_lr]) + + all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) + + if train_params['fine_tune']==False: + vars_to_init = [var for var in all_vars if var not in + obj_vars + atr_vars + all_pretrained_vars] + else: + vars_to_init = [var for var in all_vars if var not in vars_to_save] + + # Initialize vars_to_init + sess.run(tf.initialize_variables(vars_to_init)) + + print('All pretrained variables: ') + print([var.name for var in all_pretrained_vars]) + print('Variables to train from scratch: ') + print([var.name for var in vars_to_train_from_scratch]) + print('Variables to initialize randomly: ') + print([var.name for var in vars_to_init]) + print('Variables to save: ') + print([var.name for var in vars_to_save]) + # Load mean image mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \ 'Obj_Classifier/mean_image.npy') placeholders = [image_regions, questions, keep_prob, y, region_score] - - # Variables to observe - W_fc2 = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans/fc2/W') - q_feat = tf.get_collection('q_feat', scope='ans/q_embed') + + if train_params['fine_tune']==True: + restored_accuracy = evaluate(accuracy, qa_anno_dict, + region_anno_dict, ans_vocab, + vocab, image_regions_dir, + mean_image, val_start_id, + val_set_size, batch_size, + placeholders, 75, 75) + print('Accuracy of restored model: ' + str(restored_accuracy)) # Start Training -# batch_size = 1 - max_epoch = 10 + max_epoch = train_params['max_epoch'] max_iter = 5000 val_acc_array_epoch = np.zeros([max_epoch]) train_acc_array_epoch = np.zeros([max_epoch]) for epoch in range(start_epoch, max_epoch): - start = time.time() - for i in range(max_iter): + iter_ids = range(max_iter) + random.shuffle(iter_ids) + for i in iter_ids: #range(max_iter): train_region_images, train_ans_labels, train_questions, \ train_region_score, train_partition= \ @@ -194,100 +259,63 @@ def train(train_params): region_score: train_region_score, } + if pretrained_vars_low_lr: + _, _, current_train_batch_acc, y_pred_eval, loss_eval = \ + sess.run([train_step_low_lr, train_step_high_lr, + accuracy, y_pred, cross_entropy], + feed_dict=feed_dict_train) + else: + _, current_train_batch_acc, y_pred_eval, loss_eval = \ + sess.run([train_step_high_lr, accuracy, + y_pred, cross_entropy], + feed_dict=feed_dict_train) + + assert (not np.any(np.isnan(y_pred_eval))), 'NaN predicted' - try: - assert (not np.any(np.isnan(q_feat[0].eval(feed_dict_train)))) - except AssertionError: - print('NaN in q_feat') - print(1+i*batch_size) - print(train_questions) - print(logits.eval(feed_dict_train)) - print(cross_entropy.eval(feed_dict_train)) - exit(1) - - start1 = time.time() - _, current_train_batch_acc, y_avg_eval, y_pred_eval, logits_eval, W_fc2_eval = \ - sess.run([train_step, accuracy, y_avg, y_pred, logits, W_fc2[0]], - feed_dict=feed_dict_train) - end1 = time.time() - # print('Training Pass: ' + str(end1-start1)) train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] + \ current_train_batch_acc - # pdb.set_trace() - - - try: - assert (not np.any(np.isnan(W_fc2_eval))) - except AssertionError: - print('NaN in W_fc2') - print(1+i*batch_size) - print(W_fc2_eval) - exit(1) - try: - assert (not np.any(np.isnan(logits_eval))) - except AssertionError: - print('NaN in logits') - print(1+i*batch_size) - print(y_avg_eval) - exit(1) - - try: - assert (not np.any(np.isnan(y_avg_eval))) - except AssertionError: - print('NaN in y_avg') - print(1+i*batch_size) - print(logits_eval) - print(y_avg_eval) - exit(1) - if (i+1)%500==0: - print(logits_eval[0:22,:]) - print(train_region_score[0,0:22]) - print(train_ans_labels[0,:]) -# print(train_ans_labels[0,:]) - print(y_avg_eval[0,:]) -# print(y_pred_eval) val_accuracy = evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab, image_regions_dir, mean_image, - val_start_id, val_batch_size_small, - placeholders, 75, 75) + val_start_id, val_set_size_small, + batch_size, placeholders, 75, 75) - print('Iter: ' + str(i+1) + ' Val Sm Acc: ' + str(val_accuracy)) - - end = time.time() - print('Per Iter Time: ' + str(end-start)) + print('Iter: ' + str(i+1) + ' Val Sm Acc: ' + str(val_accuracy) + + ' Loss: ' + str(loss_eval)) train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] / max_iter - start = time.time() val_acc_array_epoch[epoch] = evaluate(accuracy, qa_anno_dict, - region_anno_dict, ans_vocab, vocab, - image_regions_dir, mean_image, - val_start_id, val_batch_size, + region_anno_dict, ans_vocab, + vocab, image_regions_dir, + mean_image, val_start_id, + val_set_size, batch_size, placeholders, 75, 75) - end=time.time() - print('Per Validation Time: ' + str(end-start)) + print('Val Acc: ' + str(val_acc_array_epoch[epoch]) + ' Train Acc: ' + str(train_acc_array_epoch[epoch])) - plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1, + if train_params['fine_tune']==True: + plot_path = os.path.join(outdir, 'acc_vs_epoch_' \ + + train_params['mode'] + '_fine_tuned.pdf') + else: + plot_path = os.path.join(outdir, 'acc_vs_epoch_' \ + + train_params['mode'] + '.pdf') + + plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1, ydata_train=train_acc_array_epoch[0:epoch + 1], ydata_val=val_acc_array_epoch[0:epoch + 1], xlim=[1, max_epoch], ylim=[0, 1.0], - savePath=os.path.join(outdir, - 'acc_vs_epoch_q_o_atr.pdf')) + savePath=plot_path) - save_path = saver2.save(sess, - os.path.join(outdir,'ans_classifier_question_obj_atr'), - global_step=epoch) + save_path = partial_saver \ + .save(sess, os.path.join(outdir, 'ans_classifier_' + \ + train_params['mode']), global_step=epoch) sess.close() tf.reset_default_graph() if __name__=='__main__': - train_params = { - 'adam_lr' : 0.0001, - } - train(train_params) + print 'Hello' diff --git a/classifiers/answer_classifier/train_ans_classifier.pyc b/classifiers/answer_classifier/train_ans_classifier.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a476b58d08ede1221bca8d1d9e83afa1f34f095 Binary files /dev/null and b/classifiers/answer_classifier/train_ans_classifier.pyc differ diff --git a/classifiers/tf_graph_creation_helper.py b/classifiers/tf_graph_creation_helper.py index aae6a2370ed6c8270d98c59b27b2d84b20b6d2e1..db4917528627a299c27a2ef4fa297d7a359d5cf5 100644 --- a/classifiers/tf_graph_creation_helper.py +++ b/classifiers/tf_graph_creation_helper.py @@ -8,13 +8,29 @@ graph_config = { 'num_attributes': 4, 'obj_feat_dim': 392, 'atr_feat_dim': 392, + 'region_feat_dim': 392, + 'word_vec_dim': 50, + 'ans_fc1_dim': 300, } +def get_variable(var_scope): + var_list = tf.get_collection(tf.GraphKeys.VARIABLES, scope=var_scope) + assert len(var_list)==1, 'Multiple variables exist by that name' + return var_list[0] + + +def get_list_of_variables(var_scope_list): + var_dict = dict() + for var_scope in var_scope_list: + var_dict[var_scope] = get_variable(var_scope) + return var_dict + + def weight_variable(tensor_shape, fan_in=None, var_name='W'): if fan_in==None: fan_in = reduce(lambda x, y: x*y, tensor_shape[0:-1]) + stddev = math.sqrt(2.0/fan_in) - print(stddev) initial = tf.truncated_normal(shape=tensor_shape, mean=0.0, stddev=stddev) return tf.Variable(initial_value=initial, name=var_name) @@ -94,7 +110,7 @@ def obj_comp_graph(x, keep_prob): logits = tf.add(tf.matmul(obj_feat, W_fc1), b_fc1, name='logits') y_pred = tf.nn.softmax(logits, name='softmax') -# tf.add_to_collection('obj_feat', h_pool2_drop_flat) + return y_pred @@ -140,73 +156,110 @@ def atr_comp_graph(x, keep_prob, obj_feat): logits = 0.5*logits_atr + 0.5*logits_obj + b_fc1 y_pred = tf.nn.softmax(logits, name='softmax') -# tf.add_to_collection('atr_feat', h_pool2_drop_flat) + return y_pred -def ans_comp_graph(image_regions, questions, keep_prob, \ - obj_feat, atr_feat, vocab, inv_vocab, ans_vocab_size): +def ans_comp_graph(image_regions, questions, keep_prob, obj_feat, atr_feat, + vocab, inv_vocab, ans_vocab_size, mode): + vocab_size = len(vocab) with tf.name_scope('ans') as ans_graph: + with tf.name_scope('word_embed') as word_embed: - initial = tf.truncated_normal(shape=[len(vocab),50], - stddev=math.sqrt(3.0/(31.0+300.0))) - word_vecs = tf.Variable(initial, name='word_vecs') - with tf.name_scope('q_embed') as q_embed: - q_feat = tf.matmul(questions, word_vecs) - # q_feat = tf.truediv(q_feat, tf.cast(len(vocab),tf.float32)) - # q_feat = tf.truediv(q_feat, tf.reduce_sum(questions,1,keep_dims=True)) + word_vecs = weight_variable([vocab_size, + graph_config['word_vec_dim']], + var_name='word_vecs') + q_feat = tf.matmul(questions, word_vecs, name='q_feat') with tf.name_scope('conv1') as conv1: + W_conv1 = weight_variable([5,5,3,4]) b_conv1 = bias_variable([4]) - h_conv1 = tf.nn.relu(conv2d(image_regions, W_conv1) + b_conv1, name='h') + a_conv1 = tf.add(conv2d(image_regions, W_conv1), b_conv1, name='a') + h_conv1 = tf.nn.relu(a_conv1, name='h') h_pool1 = max_pool_2x2(h_conv1) h_conv1_drop = tf.nn.dropout(h_pool1, keep_prob, name='h_pool_drop') with tf.name_scope('conv2') as conv2: + W_conv2 = weight_variable([3,3,4,8]) b_conv2 = bias_variable([8]) - h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h') + a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a') + h_conv2 = tf.nn.relu(a_conv2, name='h') h_pool2 = max_pool_2x2(h_conv2) h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob, name='h_pool_drop') - h_pool2_drop_flat = tf.reshape(h_pool2_drop, [-1, 392], name='h_pool_drop_flat') + h_pool2_drop_shape = h_pool2_drop.get_shape() + region_feat_dim = reduce(lambda f, g: f*g, + [dim.value for dim in h_pool2_drop_shape[1:]]) + region_feat = tf.reshape(h_pool2_drop, [-1, region_feat_dim], + name='region_feat') + + print('Region feature dimension: ' + str(region_feat_dim)) #392 with tf.name_scope('fc1') as fc1: - fc1_dim = 300 - W_region_fc1 = weight_variable([392, fc1_dim], var_name='W_region') - W_obj_fc1 = weight_variable([392, fc1_dim], var_name='W_obj', - std=math.sqrt(3.0/(2.0*392.0+50.0+ans_vocab_size))) - W_atr_fc1 = weight_variable([392, fc1_dim], var_name='W_atr', - std=math.sqrt(3.0/(2.0*392.0+50.0+ans_vocab_size))) - W_q_fc1 = weight_variable([50, fc1_dim], var_name='W_q', - std=math.sqrt(3.0/(50.0+ans_vocab_size))) + + fc1_dim = graph_config['ans_fc1_dim'] + W_region_fc1 = weight_variable([graph_config['region_feat_dim'], + fc1_dim], var_name='W_region') + W_obj_fc1 = weight_variable([graph_config['obj_feat_dim'], + fc1_dim], var_name='W_obj') + W_atr_fc1 = weight_variable([graph_config['atr_feat_dim'], + fc1_dim], var_name='W_atr') + W_q_fc1 = weight_variable([graph_config['word_vec_dim'], + fc1_dim], var_name='W_q') b_fc1 = bias_variable([fc1_dim]) - h_tmp = tf.matmul(q_feat, W_q_fc1) + b_fc1 + \ - tf.matmul(obj_feat, W_obj_fc1) + \ - tf.matmul(atr_feat, W_atr_fc1) - #tf.matmul(h_pool2_drop_flat, W_region_fc1) + \ + a_fc1_region = tf.matmul(region_feat, W_region_fc1, + name='a_fc1_region') + a_fc1_obj = tf.matmul(obj_feat, W_obj_fc1, name='a_fc1_obj') + a_fc1_atr = tf.matmul(atr_feat, W_atr_fc1, name='a_fc1_atr') + a_fc1_q = tf.matmul(q_feat, W_q_fc1, name='a_fc1_q') + + coeff_reg = 0.0 + coeff_obj = 0.0 + coeff_atr = 0.0 + coeff_q = 0.0 + + if mode=='q': + coeff_q = 1.0 + + elif mode=='q_reg': + coeff_q = 1/2.0 + coeff_region = 1/2.0 + + elif mode=='q_obj_atr': + coeff_q = 1/3.0 + coeff_obj = 1/3.0 + coeff_atr = 1/3.0 + + elif mode=='q_obj_atr_reg': + coeff_q = 1/4.0 + coeff_obj = 1/4.0 + coeff_atr = 1/4.0 + coeff_reg = 1/4.0 + + a_fc1 = coeff_reg * a_fc1_region + \ + coeff_obj * a_fc1_obj + \ + coeff_atr * a_fc1_atr + \ + coeff_q * a_fc1_q - h_fc1 = tf.nn.relu(h_tmp, name='h') + h_fc1 = tf.nn.relu(a_fc1, name='h') h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_drop') with tf.name_scope('fc2') as fc2: - W_fc2 = weight_variable([fc1_dim, ans_vocab_size], - std=math.sqrt(3.0/(fc1_dim))) - + W_fc2 = weight_variable([fc1_dim, ans_vocab_size]) b_fc2 = bias_variable([ans_vocab_size]) - logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 + logits = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2, name='logits') - y_pred = tf.nn.softmax(logits) + y_pred = tf.nn.softmax(logits, name='softmax') - tf.add_to_collection('region_feat', h_pool2_drop_flat) - tf.add_to_collection('q_feat', q_feat) + return y_pred - return y_pred, logits -def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals, ans_vocab_size): +def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals, + ans_vocab_size): y_pred_list = tf.split(0, batch_size, y_pred) region_score_list = tf.split(1, batch_size, region_score) y_avg_list = [] @@ -214,17 +267,23 @@ def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals, ans_vocab_ y_avg_list.append(tf.matmul(region_score_list[i],y_pred_list[i])) y_avg = tf.concat(0, y_avg_list) return y_avg + def evaluation(y, y_pred): - correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1), name='correct_prediction') - accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy') + correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1), + name='correct_prediction') + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), + name='accuracy') return accuracy def loss(y, y_pred): - cross_entropy = -tf.reduce_sum(y * tf.log(y_pred), name='cross_entropy') + y_pred_clipped = tf.clip_by_value(y_pred, 1e-10, 1) + cross_entropy = -tf.reduce_sum(y * tf.log(y_pred_clipped), + name='cross_entropy') batch_size = tf.shape(y) - return tf.truediv(cross_entropy, tf.cast(batch_size[0],tf.float32)) + print 'Batch Size:' + str(tf.cast(batch_size[0],tf.float32)) + return tf.truediv(cross_entropy, tf.cast(20,tf.float32))#batch_size[0],tf.float32)) if __name__ == '__main__': diff --git a/classifiers/tf_graph_creation_helper.pyc b/classifiers/tf_graph_creation_helper.pyc index 9b27a5400c59cc016fa82b56436b143641f67169..02d92a889c316357acd64391630440f3d31982be 100644 Binary files a/classifiers/tf_graph_creation_helper.pyc and b/classifiers/tf_graph_creation_helper.pyc differ diff --git a/classifiers/train_classifiers.py b/classifiers/train_classifiers.py index 2fa365ee5660d01698d94a308ab9887318f11d68..3bf5366d38b963e2b05f2dcabe0ab2ab75ec8797 100644 --- a/classifiers/train_classifiers.py +++ b/classifiers/train_classifiers.py @@ -9,14 +9,18 @@ import object_classifiers.train_obj_classifier as obj_trainer import object_classifiers.eval_obj_classifier as obj_evaluator import attribute_classifiers.train_atr_classifier as atr_trainer import attribute_classifiers.eval_atr_classifier as atr_evaluator +import answer_classifier.train_ans_classifier as ans_trainer workflow = { 'train_obj': False, 'eval_obj': False, 'train_atr': False, - 'eval_atr': True, + 'eval_atr': False, + 'train_ans': True, } +ans_mode = ['q'] + obj_classifier_train_params = { 'out_dir': '/home/tanmay/Code/GenVQA/Exp_Results/Obj_Classifier', 'adam_lr': 0.0001, @@ -55,6 +59,24 @@ atr_classifier_eval_params = { 'html_dir': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/html_dir', } +ans_classifier_train_params = { + 'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json', + 'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json', + 'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json', + 'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images', + 'image_regions_dir': '/mnt/ramdisk/image_regions', + 'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier', + 'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1', + 'adam_high_lr' : 0.0001, + 'adam_low_lr' : 0.0000,#1, + 'mode' : 'q_reg', + 'crop_n_save_regions': False, + 'max_epoch': 10, + 'batch_size': 20, + 'fine_tune': False, + 'start_model': 9, +} + if __name__=='__main__': if workflow['train_obj']: obj_trainer.train(obj_classifier_train_params) @@ -67,3 +89,6 @@ if __name__=='__main__': if workflow['eval_atr']: atr_evaluator.eval(atr_classifier_eval_params) + + if workflow['train_ans']: + ans_trainer.train(ans_classifier_train_params) diff --git a/shapes_dataset/evaluate_shapes_test.py b/shapes_dataset/evaluate_shapes_test.py index 1a53663d31389746a2dbe2756a6e21620c5e8942..f75b659e9260153a8d8db95bff8e58120cb5edce 100644 --- a/shapes_dataset/evaluate_shapes_test.py +++ b/shapes_dataset/evaluate_shapes_test.py @@ -7,7 +7,7 @@ if __name__== "__main__": res_data = json.load(f_res); anno_data = json.load(f_anno); - + print(len(anno_data)) assert(len(res_data) == len(anno_data)) res_dict = dict() # convert to map with qid as key