diff --git a/classifiers/answer_classifier/ans_data_io_helper.py b/classifiers/answer_classifier/ans_data_io_helper.py index 5c55cde21a97e166b11d2a40851b184f336346e6..50317d1ac7cea4161ae88d2e6bc57c2ada1c0372 100644 --- a/classifiers/answer_classifier/ans_data_io_helper.py +++ b/classifiers/answer_classifier/ans_data_io_helper.py @@ -117,7 +117,7 @@ def ans_mini_batch_loader(qa_dict, region_anno_dict, ans_dict, vocab, region_score = np.zeros(shape=[1,count]) partition = np.zeros(shape=[count]) question_encodings = np.zeros(shape=[count, len(vocab)]) - + for i in xrange(start_index, start_index + batch_size): image_id = qa_dict[i].image_id @@ -132,14 +132,25 @@ def ans_mini_batch_loader(qa_dict, region_anno_dict, ans_dict, vocab, end1 = time.time() # print('Ranking Region: ' + str(end1-start1)) - + question_encoding_tmp = np.zeros(shape=[1, len(vocab)]) + for word in question[0:-1].split(): + if word not in vocab: word = 'unk' - question_encodings[0, vocab[word]] += 1 - + question_encoding_tmp[0, vocab[word]] += 1 + question_len = np.sum(question_encoding_tmp) + # print(question[0:-1].split()) + # print(question_len) + # print(question_encoding_tmp) + # print(vocab) + assert (not question_len==0) + + question_encoding_tmp /= question_len + for j in xrange(num_proposals): counter = j + (i-start_index)*num_proposals + proposal = regions[j] start2 = time.time() @@ -153,7 +164,7 @@ def ans_mini_batch_loader(qa_dict, region_anno_dict, ans_dict, vocab, region_score[0,counter] = proposal.score partition[counter] = i-start_index - question_encodings[counter,:] = question_encodings[0,:] + question_encodings[counter,:] = question_encoding_tmp score_start_id = (i-start_index)*num_proposals region_score[0, score_start_id:score_start_id+num_proposals] /= \ diff --git a/classifiers/answer_classifier/ans_data_io_helper.pyc b/classifiers/answer_classifier/ans_data_io_helper.pyc index 9ed62f9ffe32ff02671645bc90a02302313167b1..a82756a940860691e4e9f2f7fbb6be98c2d944f8 100644 Binary files a/classifiers/answer_classifier/ans_data_io_helper.pyc and b/classifiers/answer_classifier/ans_data_io_helper.pyc differ diff --git a/classifiers/answer_classifier/train_ans_classifier.py b/classifiers/answer_classifier/train_ans_classifier.py index 1bf795b27b041730981e253ac09d8614aa21cf8b..a4dfddec88da3b593d79ebcb144e0cccdfa78528 100644 --- a/classifiers/answer_classifier/train_ans_classifier.py +++ b/classifiers/answer_classifier/train_ans_classifier.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np import math +import pdb import tensorflow as tf import object_classifiers.obj_data_io_helper as obj_data_loader import attribute_classifiers.atr_data_io_helper as atr_data_loader @@ -81,11 +82,11 @@ def train(train_params): # Save region crops if crop_n_save_regions == True: qa_anno_dict_test = ans_io_helper.parse_qa_anno(test_anno_filename) - ans_io_helper.save_regions(image_dir, image_regions_dir, - qa_anno_dict, region_anno_dict, + ans_io_helper.save_regions(image_dir, image_regions_dir, + qa_anno_dict, region_anno_dict, 1, 111351, 75, 75) - ans_io_helper.save_regions(image_dir, image_regions_dir, - qa_anno_dict_test, region_anno_dict, + ans_io_helper.save_regions(image_dir, image_regions_dir, + qa_anno_dict_test, region_anno_dict, 111352, 160725-111352+1, 75, 75) @@ -99,7 +100,7 @@ def train(train_params): atr_feat = tf.get_collection('atr_feat', scope='atr/conv2') # model restoration - obj_atr_saver = tf.train.Saver() +# obj_atr_saver = tf.train.Saver() model_to_restore = '/home/tanmay/Code/GenVQA/GenVQA/classifiers/' + \ 'saved_models/obj_atr_classifier-1' # obj_atr_saver.restore(sess, model_to_restore) @@ -120,18 +121,26 @@ def train(train_params): train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \ .minimize(cross_entropy, var_list=vars_to_opt) - print(train_step.name) - vars_to_restore = tf.get_collection(tf.GraphKeys.VARIABLES,scope='obj') + \ - tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + \ - tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans/word_embed') + + word_embed = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans/word_embed') + vars_to_restore = \ + tf.get_collection(tf.GraphKeys.VARIABLES,scope='obj') + \ + tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + \ + [word_embed[0]] + all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) vars_to_init = [var for var in all_vars if var not in vars_to_restore] + vars_to_save = tf.get_collection(tf.GraphKeys.VARIABLES,scope='obj') + \ + tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + \ + tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans') + print('vars_to_save: ') + print([var.name for var in vars_to_save]) # Session saver saver = tf.train.Saver(vars_to_restore) - + saver2 = tf.train.Saver(vars_to_save) if restore_intermediate_model==True: intermediate_model = '/home/tanmay/Code/GenVQA/Exp_Results/' + \ 'Ans_Classifier/ans_classifier_question_only-9' @@ -155,6 +164,10 @@ def train(train_params): placeholders = [image_regions, questions, keep_prob, y, region_score] + # Variables to observe + W_fc2 = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans/fc2/W') + q_feat = tf.get_collection('q_feat', scope='ans/q_embed') + # Start Training # batch_size = 1 max_epoch = 10 @@ -180,25 +193,58 @@ def train(train_params): y: train_ans_labels, region_score: train_region_score, } + + + try: + assert (not np.any(np.isnan(q_feat[0].eval(feed_dict_train)))) + except AssertionError: + print('NaN in q_feat') + print(1+i*batch_size) + print(train_questions) + print(logits.eval(feed_dict_train)) + print(cross_entropy.eval(feed_dict_train)) + exit(1) + start1 = time.time() - _, current_train_batch_acc, y_avg_eval, y_pred_eval, logits_eval = \ - sess.run([train_step, accuracy, y_avg, y_pred, logits], + _, current_train_batch_acc, y_avg_eval, y_pred_eval, logits_eval, W_fc2_eval = \ + sess.run([train_step, accuracy, y_avg, y_pred, logits, W_fc2[0]], feed_dict=feed_dict_train) end1 = time.time() # print('Training Pass: ' + str(end1-start1)) train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] + \ current_train_batch_acc + # pdb.set_trace() + + + + try: + assert (not np.any(np.isnan(W_fc2_eval))) + except AssertionError: + print('NaN in W_fc2') + print(1+i*batch_size) + print(W_fc2_eval) + exit(1) + try: + assert (not np.any(np.isnan(logits_eval))) + except AssertionError: + print('NaN in logits') + print(1+i*batch_size) + print(y_avg_eval) + exit(1) + try: assert (not np.any(np.isnan(y_avg_eval))) except AssertionError: - print('Run NaNs coming') + print('NaN in y_avg') print(1+i*batch_size) + print(logits_eval) print(y_avg_eval) exit(1) if (i+1)%500==0: print(logits_eval[0:22,:]) print(train_region_score[0,0:22]) + print(train_ans_labels[0,:]) # print(train_ans_labels[0,:]) print(y_avg_eval[0,:]) # print(y_pred_eval) @@ -233,15 +279,15 @@ def train(train_params): savePath=os.path.join(outdir, 'acc_vs_epoch_q_o_atr.pdf')) - save_path = saver.save(sess, - os.path.join(outdir,'ans_classifier_question_obj_atr_only'), - global_step=epoch) + save_path = saver2.save(sess, + os.path.join(outdir,'ans_classifier_question_obj_atr'), + global_step=epoch) sess.close() tf.reset_default_graph() if __name__=='__main__': train_params = { - 'adam_lr' : 0.00001, + 'adam_lr' : 0.0001, } train(train_params) diff --git a/classifiers/attribute_classifiers/eval_atr_classifier.py b/classifiers/attribute_classifiers/eval_atr_classifier.py index 643139066de0a3093df31b7496db85a29aa154e2..da1b343663ac50a438fcd0493029a1973f4c75ca 100644 --- a/classifiers/attribute_classifiers/eval_atr_classifier.py +++ b/classifiers/attribute_classifiers/eval_atr_classifier.py @@ -15,13 +15,20 @@ def eval(eval_params): x, y, keep_prob = graph_creator.placeholder_inputs() _ = graph_creator.obj_comp_graph(x, 1.0) - obj_feat = tf.get_collection('obj_feat', scope='obj/conv2') - - y_pred = graph_creator.atr_comp_graph(x, keep_prob, obj_feat[0]) + g = tf.get_default_graph() + obj_feat = g.get_operation_by_name('obj/conv2/obj_feat') + y_pred = graph_creator.atr_comp_graph(x, keep_prob, obj_feat.outputs[0]) accuracy = graph_creator.evaluation(y, y_pred) - saver = tf.train.Saver() - saver.restore(sess, eval_params['model_name'] + '-' + str(eval_params['global_step'])) + # Object model restorer + vars_to_restore = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj') + \ + tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + print('Variables to restore:') + print([var.name for var in vars_to_restore]) + + saver = tf.train.Saver(vars_to_restore) + saver.restore(sess, eval_params['model_name'] + '-' + \ + str(eval_params['global_step'])) mean_image = np.load(os.path.join(eval_params['out_dir'], 'mean_image.npy')) test_json_filename = eval_params['test_json'] @@ -36,12 +43,17 @@ def eval(eval_params): html_dir = eval_params['html_dir'] if not os.path.exists(html_dir): os.mkdir(html_dir) - html_writer = atr_data_loader.html_atr_table_writer(os.path.join(html_dir, 'index.html')) + + html_writer = atr_data_loader \ + .html_atr_table_writer(os.path.join(html_dir, 'index.html')) + col_dict = { 0: 'Grount Truth', 1: 'Prediction', 2: 'Image'} + html_writer.add_element(col_dict) + color_dict = { 0: 'red', # blanks are treated as red 1: 'green', @@ -51,7 +63,9 @@ def eval(eval_params): batch_size = 100 correct = 0 for i in range(50): - test_batch = atr_data_loader.atr_mini_batch_loader(test_json_data, image_dir, mean_image, 10000+i*batch_size, batch_size, 75, 75) + test_batch = atr_data_loader\ + .atr_mini_batch_loader(test_json_data, image_dir, mean_image, + 10000+i*batch_size, batch_size, 75, 75) feed_dict_test={x: test_batch[0], y: test_batch[1], keep_prob: 1.0} result = sess.run([accuracy, y_pred], feed_dict=feed_dict_test) correct = correct + result[0]*batch_size diff --git a/classifiers/attribute_classifiers/eval_atr_classifier.pyc b/classifiers/attribute_classifiers/eval_atr_classifier.pyc index 960456aa4a4ea6b14e9af628b1864cfd74c89fa3..3e013ff8e315f15dbf5bdb4a5e6996969f81c984 100644 Binary files a/classifiers/attribute_classifiers/eval_atr_classifier.pyc and b/classifiers/attribute_classifiers/eval_atr_classifier.pyc differ diff --git a/classifiers/attribute_classifiers/train_atr_classifier.py b/classifiers/attribute_classifiers/train_atr_classifier.py index 00fc144ad3bf313dab9bbf6bc12c83ee0874a4c8..e81e170bb6a041f8b546abb5082b68ae1cc824b7 100644 --- a/classifiers/attribute_classifiers/train_atr_classifier.py +++ b/classifiers/attribute_classifiers/train_atr_classifier.py @@ -15,31 +15,47 @@ def train(train_params): x, y, keep_prob = graph_creator.placeholder_inputs() _ = graph_creator.obj_comp_graph(x, 1.0) - obj_feat = tf.get_collection('obj_feat', scope='obj/conv2') + g = tf.get_default_graph() + obj_feat = g.get_operation_by_name('obj/conv2/obj_feat') - # Session Saver - obj_saver = tf.train.Saver() + # Object model restorer + vars_to_restore = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj') + print('Variables to restore:') + print([var.name for var in vars_to_restore]) - # Restore obj network parameters - obj_saver.restore(sess, train_params['obj_model_name'] + '-' + str(train_params['obj_global_step'])) + obj_saver = tf.train.Saver(vars_to_restore) + obj_saver.restore(sess, train_params['obj_model_name'] + '-' + \ + str(train_params['obj_global_step'])) - y_pred = graph_creator.atr_comp_graph(x, keep_prob, obj_feat[0]) + y_pred = graph_creator.atr_comp_graph(x, keep_prob, obj_feat.outputs[0]) cross_entropy = graph_creator.loss(y, y_pred) + accuracy = graph_creator.evaluation(y, y_pred) + + + # Collect variables to save or optimize vars_to_opt = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') - vars_to_restore = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj') - train_step = tf.train.AdamOptimizer(train_params['adam_lr']).minimize(cross_entropy, var_list=vars_to_opt) + vars_to_save = vars_to_opt + vars_to_restore + + print('Variables to optimize:') + print([var.name for var in vars_to_opt]) + print('Variables to save:') + print([var.name for var in vars_to_save]) + + # Object and Attribute model saver + obj_atr_saver = tf.train.Saver(vars_to_save) + # Add optimization op + train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \ + .minimize(cross_entropy, var_list=vars_to_opt) + + # Collect variables to initialize all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) vars_to_init = [var for var in all_vars if var not in vars_to_restore] - print('Variables that are being optimized: ' + ' '.join([var.name for var in vars_to_opt])) - print('Variables that will be initialized: ' + ' '.join([var.name for var in vars_to_init])) - accuracy = graph_creator.evaluation(y, y_pred) + print('Variables to initialize:') + print([var.name for var in vars_to_init]) + - # Session saver for atr variables - atr_saver = tf.train.Saver(vars_to_opt) - obj_atr_saver = tf.train.Saver(all_vars) - outdir = train_params['out_dir'] if not os.path.exists(outdir): os.mkdir(outdir) @@ -59,7 +75,9 @@ def train(train_params): image_dir = train_params['image_dir'] if train_params['mean_image']=='': print('Computing mean image') - mean_image = atr_data_loader.mean_image(train_json_data, image_dir, 1000, 100, img_height, img_width) + mean_image = atr_data_loader.mean_image(train_json_data, image_dir, + 1000, 100, + img_height, img_width) else: print('Loading mean image') mean_image = np.load(train_params['mean_image']) @@ -67,7 +85,11 @@ def train(train_params): # Val Data print('Loading validation data') - val_batch = atr_data_loader.atr_mini_batch_loader(train_json_data, image_dir, mean_image, 9501, 499, img_height, img_width) + val_batch = atr_data_loader.atr_mini_batch_loader(train_json_data, + image_dir, mean_image, + 9501, 499, + img_height, img_width) + feed_dict_val={x: val_batch[0], y: val_batch[1], keep_prob: 1.0} @@ -85,16 +107,31 @@ def train(train_params): for i in range(max_iter): if i%100==0: print('Iter: ' + str(i)) - train_batch = atr_data_loader.atr_mini_batch_loader(train_json_data, image_dir, mean_image, 1+i*batch_size, batch_size, img_height, img_width) + print('Val Acc: ' + str(accuracy.eval(feed_dict_val))) + + train_batch = atr_data_loader \ + .atr_mini_batch_loader(train_json_data, image_dir, mean_image, + 1+i*batch_size, batch_size, + img_height, img_width) feed_dict_train={x: train_batch[0], y: train_batch[1], keep_prob: 0.5} - _, current_train_batch_acc = sess.run([train_step, accuracy], feed_dict=feed_dict_train) - train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] + current_train_batch_acc + + _, current_train_batch_acc = sess.run([train_step, accuracy], + feed_dict=feed_dict_train) + train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] \ + + current_train_batch_acc train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] / max_iter val_acc_array_epoch[epoch] = accuracy.eval(feed_dict_val) - plotter.plot_accuracies(xdata=np.arange(0,epoch+1)+1, ydata_train=train_acc_array_epoch[0:epoch+1], ydata_val=val_acc_array_epoch[0:epoch+1], xlim=[1, max_epoch], ylim=[0, 1.], savePath=os.path.join(outdir,'acc_vs_epoch.pdf')) - _ = atr_saver.save(sess, os.path.join(outdir,'atr_classifier'), global_step=epoch) - _ = obj_atr_saver.save(sess, os.path.join(outdir,'obj_atr_classifier'), global_step=epoch) + + plotter.plot_accuracies(xdata=np.arange(0,epoch+1)+1, + ydata_train=train_acc_array_epoch[0:epoch+1], + ydata_val=val_acc_array_epoch[0:epoch+1], + xlim=[1, max_epoch], ylim=[0, 1.], + savePath=os.path.join(outdir, + 'acc_vs_epoch.pdf')) + + _ = obj_atr_saver.save(sess, os.path.join(outdir,'obj_atr_classifier'), + global_step=epoch) sess.close() diff --git a/classifiers/attribute_classifiers/train_atr_classifier.pyc b/classifiers/attribute_classifiers/train_atr_classifier.pyc index a80445734eae7f1bbe09a014ed07d0acab095d64..6635e32a225d8d0bcdc94f7d5551369481cc16e4 100644 Binary files a/classifiers/attribute_classifiers/train_atr_classifier.pyc and b/classifiers/attribute_classifiers/train_atr_classifier.pyc differ diff --git a/classifiers/object_classifiers/#obj_data_io_helper.py# b/classifiers/object_classifiers/#obj_data_io_helper.py# deleted file mode 100644 index 16280b4790d82098c662dc78b320d45bc80ac411..0000000000000000000000000000000000000000 --- a/classifiers/object_classifiers/#obj_data_io_helper.py# +++ /dev/null @@ -1,82 +0,0 @@ -import json -import sys -import os -import matplotlib.pyplot as plt -import matplotlib.image as mpimg -import numpy as np -import tensorflow as tf -from scipy import misc - -def obj_mini_batch_loader(json_data, image_dir, mean_image, start_index, batch_size, img_height = 100, img_width = 100, channels = 3): - - obj_images = np.empty(shape=[9 * batch_size, img_height / 3, img_width / 3, channels]) - obj_labels = np.zeros(shape=[9 * batch_size, 4]) - - for i in range(start_index, start_index + batch_size): - image_name = os.path.join(image_dir, str(i) + '.jpg') - image = misc.imresize(mpimg.imread(image_name), (img_height, img_width), interp='nearest') - crop_shape = np.array([image.shape[0], image.shape[1]]) / 3 - grid_config = json_data[i] - - counter = 0 - for grid_row in range(0, 3): - for grid_col in range(0, 3): - start_row = grid_row * crop_shape[0] - start_col = grid_col * crop_shape[1] - cropped_image = image[start_row:start_row + crop_shape[0], start_col:start_col + crop_shape[1], :] - - if np.ndim(mean_image) == 0: - obj_images[9 * (i - start_index) + counter, :, :, :] = cropped_image / 254.0 - else: - obj_images[9 * (i - start_index) + counter, :, :, :] = (cropped_image / 254.0) - mean_image - - obj_labels[9 * (i - start_index) + counter, grid_config[6 * grid_row + 2 * grid_col]] = 1 - counter = counter + 1 - - return (obj_images, obj_labels) - - -def mean_image_batch(json_data, image_dir, start_index, batch_size, img_height = 100, img_width = 100, channels = 3): - batch = obj_mini_batch_loader(json_data, image_dir, np.empty([]), start_index, batch_size, img_height, img_width, channels) - mean_image = np.mean(batch[0], 0) - return mean_image - - -def mean_image(json_data, image_dir, num_images, batch_size, img_height = 100, img_width = 100, channels = 3): - max_iter = np.floor(num_images / batch_size) - mean_image = np.zeros([img_height / 3, img_width / 3, channels]) - for i in range(max_iter.astype(np.int16)): - mean_image = mean_image + mean_image_batch(json_data, image_dir, 1 + i * batch_size, batch_size, img_height, img_width, channels) - - mean_image = mean_image / max_iter - return mean_image - - -class html_obj_table_writer: - - def __init__(self, filename): - self.filename = filename - self.html_file = open(self.filename, 'w') - self.html_file.write('<!DOCTYPE html>\n<html>\n<body>\n<table border="1" style="width:100%"> \n') - - def add_element(self, col_dict): - self.html_file.write(' <tr>\n') - for key in range(len(col_dict)): - self.html_file.write(' <td>{}</td>\n'.format(col_dict[key])) - - self.html_file.write(' </tr>\n') - - def image_tag(self, image_path, height, width): - return '<img src="{}" alt="IMAGE NOT FOUND!" height={} width={}>'.format(image_path, height, width) - - def close_file(self): - self.html_file.write('</table>\n</body>\n</html>') - self.html_file.close() - - -if __name__ == '__main__': - html_writer = html_obj_table_writer('/home/tanmay/Code/GenVQA/Exp_Results/Shape_Classifier_v_1/trial.html') - col_dict = {0: 'sam', - 1: html_writer.image_tag('something.png', 25, 25)} - html_writer.add_element(col_dict) - html_writer.close_file() diff --git a/classifiers/object_classifiers/.#obj_data_io_helper.py b/classifiers/object_classifiers/.#obj_data_io_helper.py deleted file mode 120000 index 8c52df0a7c196d3214ae8b463b23e2f045e3080c..0000000000000000000000000000000000000000 --- a/classifiers/object_classifiers/.#obj_data_io_helper.py +++ /dev/null @@ -1 +0,0 @@ -tanmay@crunchy.15752:1450461082 \ No newline at end of file diff --git a/classifiers/object_classifiers/eval_obj_classifier.py b/classifiers/object_classifiers/eval_obj_classifier.py index 6d209dfb2b9500fb7453d464bc86fef11290fb06..89beda0f2d869999d408277d64c7d9d93a31dcb3 100644 --- a/classifiers/object_classifiers/eval_obj_classifier.py +++ b/classifiers/object_classifiers/eval_obj_classifier.py @@ -16,9 +16,17 @@ def eval(eval_params): x, y, keep_prob = graph_creator.placeholder_inputs() y_pred = graph_creator.obj_comp_graph(x, keep_prob) accuracy = graph_creator.evaluation(y, y_pred) - - saver = tf.train.Saver() - saver.restore(sess, eval_params['model_name'] + '-' + str(eval_params['global_step'])) + + # Collect variables + vars_to_restore = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj') + print('Variables to restore:') + print([var.name for var in vars_to_restore]) + + saver = tf.train.Saver(vars_to_restore) + saver.restore(sess, eval_params['model_name'] + '-' + \ + str(eval_params['global_step'])) + + print 'Loading mean image' mean_image = np.load(os.path.join(eval_params['out_dir'], 'mean_image.npy')) test_json_filename = eval_params['test_json'] with open(test_json_filename, 'r') as json_file: @@ -32,12 +40,17 @@ def eval(eval_params): html_dir = eval_params['html_dir'] if not os.path.exists(html_dir): os.mkdir(html_dir) - html_writer = shape_data_loader.html_obj_table_writer(os.path.join(html_dir, 'index.html')) + + html_writer = shape_data_loader \ + .html_obj_table_writer(os.path.join(html_dir, 'index.html')) + col_dict = { 0: 'Grount Truth', 1: 'Prediction', 2: 'Image'} + html_writer.add_element(col_dict) + shape_dict = { 0: 'blank', 1: 'rectangle', @@ -47,7 +60,8 @@ def eval(eval_params): batch_size = 100 correct = 0 for i in range(50): - test_batch = shape_data_loader.obj_mini_batch_loader(test_json_data, image_dir, mean_image, 10000 + i * batch_size, batch_size, 75, 75) + test_batch = shape_data_loader.obj_mini_batch_loader(test_json_data, + image_dir, mean_image, 10000 + i * batch_size, batch_size, 75, 75) feed_dict_test = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0} result = sess.run([accuracy, y_pred], feed_dict=feed_dict_test) correct = correct + result[0] * batch_size @@ -57,12 +71,15 @@ def eval(eval_params): gt_id = np.argmax(test_batch[1][row, :]) pred_id = np.argmax(result[1][row, :]) if not gt_id == pred_id: - img_filename = os.path.join(html_dir, '{}_{}.png'.format(i, row)) - misc.imsave(img_filename, test_batch[0][row, :, :, :] + mean_image) + img_filename = os.path.join(html_dir, + '{}_{}.png'.format(i, row)) + misc.imsave(img_filename, + test_batch[0][row, :, :, :] + mean_image) col_dict = { 0: shape_dict[gt_id], 1: shape_dict[pred_id], - 2: html_writer.image_tag('{}_{}.png'.format(i, row), 25, 25)} + 2: html_writer.image_tag('{}_{}.png' \ + .format(i, row), 25, 25)} html_writer.add_element(col_dict) html_writer.close_file() diff --git a/classifiers/object_classifiers/eval_obj_classifier.pyc b/classifiers/object_classifiers/eval_obj_classifier.pyc index 8b6be912f6e9110be8d3644516b9293701cce808..6e3241b3601e7c0b97d1710b323fd4aef37d0f16 100644 Binary files a/classifiers/object_classifiers/eval_obj_classifier.pyc and b/classifiers/object_classifiers/eval_obj_classifier.pyc differ diff --git a/classifiers/object_classifiers/obj_data_io_helper.pyc b/classifiers/object_classifiers/obj_data_io_helper.pyc index ae5847b62758c1ea8aec0706d785fa461c734d1f..0351d49c5a5a5c01098023f5cf8035351f187977 100644 Binary files a/classifiers/object_classifiers/obj_data_io_helper.pyc and b/classifiers/object_classifiers/obj_data_io_helper.pyc differ diff --git a/classifiers/object_classifiers/train_obj_classifier.py b/classifiers/object_classifiers/train_obj_classifier.py index b3711271bc3b6a25ccfab0145724800d104eebe6..ac3f85d849351ecc893d32b89295ba21425c5f08 100644 --- a/classifiers/object_classifiers/train_obj_classifier.py +++ b/classifiers/object_classifiers/train_obj_classifier.py @@ -15,7 +15,6 @@ def train(train_params): x, y, keep_prob = graph_creator.placeholder_inputs() y_pred = graph_creator.obj_comp_graph(x, keep_prob) cross_entropy = graph_creator.loss(y, y_pred) - train_step = tf.train.AdamOptimizer(train_params['adam_lr']).minimize(cross_entropy) accuracy = graph_creator.evaluation(y, y_pred) outdir = train_params['out_dir'] @@ -37,19 +36,36 @@ def train(train_params): image_dir = train_params['image_dir'] if train_params['mean_image']=='': print('Computing mean image') - mean_image = shape_data_loader.mean_image(train_json_data, image_dir, 1000, 100, img_height, img_width) + mean_image = shape_data_loader.mean_image(train_json_data, + image_dir, 1000, 100, + img_height, img_width) else: print('Loading mean image') mean_image = np.load(train_params['mean_image']) np.save(os.path.join(outdir, 'mean_image.npy'), mean_image) # Val Data - val_batch = shape_data_loader.obj_mini_batch_loader(train_json_data, image_dir, mean_image, 9501, 499, img_height, img_width) + val_batch = shape_data_loader.obj_mini_batch_loader(train_json_data, + image_dir, mean_image, + 9501, 499, + img_height, img_width) feed_dict_val = {x: val_batch[0], y: val_batch[1], keep_prob: 1.0} + # Collect variables + all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) + vars_to_save = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj') + + print('All variables:') + print([var.name for var in all_vars]) + print('Variables to save:') + print([var.name for var in vars_to_save]) + # Session Saver - saver = tf.train.Saver() + saver = tf.train.Saver(vars_to_save) + # Add optimization op + train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \ + .minimize(cross_entropy) # Start Training sess.run(tf.initialize_all_variables()) batch_size = 10 @@ -64,15 +80,32 @@ def train(train_params): print('Iter: ' + str(i)) print('Val Acc: ' + str(accuracy.eval(feed_dict_val))) - train_batch = shape_data_loader.obj_mini_batch_loader(train_json_data, image_dir, mean_image, 1 + i * batch_size, batch_size, img_height, img_width) - feed_dict_train = {x: train_batch[0], y: train_batch[1], keep_prob: 0.5} - _, current_train_batch_acc = sess.run([train_step, accuracy], feed_dict=feed_dict_train) - train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] + current_train_batch_acc + train_batch = shape_data_loader \ + .obj_mini_batch_loader(train_json_data, image_dir, mean_image, + 1 + i * batch_size, batch_size, + img_height, img_width) + feed_dict_train = { + x: train_batch[0], + y: train_batch[1], + keep_prob: 0.5 + } + + _, current_train_batch_acc = sess.run([train_step, accuracy], + feed_dict=feed_dict_train) + train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] \ + + current_train_batch_acc train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] / max_iter val_acc_array_epoch[epoch] = accuracy.eval(feed_dict_val) - plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1, ydata_train=train_acc_array_epoch[0:epoch + 1], ydata_val=val_acc_array_epoch[0:epoch + 1], xlim=[1, max_epoch], ylim=[0, 1.0], savePath=os.path.join(outdir, 'acc_vs_epoch.pdf')) - save_path = saver.save(sess, os.path.join(outdir, 'obj_classifier'), global_step=epoch) + + plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1, + ydata_train=train_acc_array_epoch[0:epoch + 1], + ydata_val=val_acc_array_epoch[0:epoch + 1], + xlim=[1, max_epoch], ylim=[0, 1.0], + savePath=os.path.join(outdir, + 'acc_vs_epoch.pdf')) + save_path = saver.save(sess, os.path.join(outdir, 'obj_classifier'), + global_step=epoch) sess.close() tf.reset_default_graph() diff --git a/classifiers/object_classifiers/train_obj_classifier.pyc b/classifiers/object_classifiers/train_obj_classifier.pyc index b03422c5963b73ccb531ea7d02982f1893120892..4318b88313f9f71ae7931ceba42136ffeb86287b 100644 Binary files a/classifiers/object_classifiers/train_obj_classifier.pyc and b/classifiers/object_classifiers/train_obj_classifier.pyc differ diff --git a/classifiers/tf_graph_creation_helper.py b/classifiers/tf_graph_creation_helper.py index be5d85448d0d3e55c750d9e8d37fed703982919a..aae6a2370ed6c8270d98c59b27b2d84b20b6d2e1 100644 --- a/classifiers/tf_graph_creation_helper.py +++ b/classifiers/tf_graph_creation_helper.py @@ -3,17 +3,28 @@ import math import tensorflow as tf import answer_classifier.ans_data_io_helper as ans_io_helper -def weight_variable(shape, var_name = 'W', std=0.1): - initial = tf.truncated_normal(shape, stddev=std) - return tf.Variable(initial, name=var_name) +graph_config = { + 'num_objects': 4, + 'num_attributes': 4, + 'obj_feat_dim': 392, + 'atr_feat_dim': 392, +} +def weight_variable(tensor_shape, fan_in=None, var_name='W'): + if fan_in==None: + fan_in = reduce(lambda x, y: x*y, tensor_shape[0:-1]) + stddev = math.sqrt(2.0/fan_in) + print(stddev) + initial = tf.truncated_normal(shape=tensor_shape, mean=0.0, stddev=stddev) + return tf.Variable(initial_value=initial, name=var_name) -def bias_variable(shape, var_name = 'b'): - initial = tf.constant(0.001, shape=shape) - return tf.Variable(initial, name=var_name) +def bias_variable(tensor_shape, var_name='b'): + initial = tf.constant(value=0.0, shape=tensor_shape) + return tf.Variable(initial_value=initial, name=var_name) -def conv2d(x, W, var_name = 'W'): + +def conv2d(x, W, var_name = 'conv'): return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME', name=var_name) @@ -31,6 +42,7 @@ def placeholder_inputs(mode = 'gt'): if mode == 'no_gt': print 'No placeholder for ground truth' return (x, keep_prob) + def placeholder_inputs_ans(total_vocab_size, ans_vocab_size, mode='gt'): image_regions = tf.placeholder(tf.float32, shape=[None,25,25,3]) @@ -49,50 +61,89 @@ def placeholder_inputs_ans(total_vocab_size, ans_vocab_size, mode='gt'): def obj_comp_graph(x, keep_prob): with tf.name_scope('obj') as obj_graph: + with tf.name_scope('conv1') as conv1: + W_conv1 = weight_variable([5,5,3,4]) b_conv1 = bias_variable([4]) - h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1, name='h') + a_conv1 = tf.add(conv2d(x, W_conv1), b_conv1, name='a') + h_conv1 = tf.nn.relu(a_conv1, name='h') h_pool1 = max_pool_2x2(h_conv1) h_conv1_drop = tf.nn.dropout(h_pool1, keep_prob, name='h_pool_drop') + with tf.name_scope('conv2') as conv2: + W_conv2 = weight_variable([3,3,4,8]) b_conv2 = bias_variable([8]) - h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h') + a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a') + h_conv2 = tf.nn.relu(a_conv2, name='h') h_pool2 = max_pool_2x2(h_conv2) h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob, name='h_pool_drop') - h_pool2_drop_flat = tf.reshape(h_pool2_drop, [-1, 392], name='h_pool_drop_flat') + h_pool2_drop_shape = h_pool2_drop.get_shape() + obj_feat_dim = reduce(lambda f, g: f*g, + [dim.value for dim in h_pool2_drop_shape[1:]]) + obj_feat = tf.reshape(h_pool2_drop, [-1, obj_feat_dim], + name='obj_feat') + + print('Object feature dimension: ' + str(obj_feat_dim)) #392 + with tf.name_scope('fc1') as fc1: - W_fc1 = weight_variable([392, 4]) + + W_fc1 = weight_variable([obj_feat_dim, graph_config['num_objects']]) b_fc1 = bias_variable([4]) - y_pred = tf.nn.softmax(tf.matmul(h_pool2_drop_flat, W_fc1) + b_fc1) - tf.add_to_collection('obj_feat', h_pool2_drop_flat) + logits = tf.add(tf.matmul(obj_feat, W_fc1), b_fc1, name='logits') + + y_pred = tf.nn.softmax(logits, name='softmax') +# tf.add_to_collection('obj_feat', h_pool2_drop_flat) return y_pred def atr_comp_graph(x, keep_prob, obj_feat): with tf.name_scope('atr') as obj_graph: + with tf.name_scope('conv1') as conv1: + W_conv1 = weight_variable([5,5,3,4]) b_conv1 = bias_variable([4]) - h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1, name='h') + a_conv1 = tf.add(conv2d(x, W_conv1), b_conv1, name='a') + h_conv1 = tf.nn.relu(a_conv1, name='h') h_pool1 = max_pool_2x2(h_conv1) h_conv1_drop = tf.nn.dropout(h_pool1, keep_prob, name='h_pool_drop') + with tf.name_scope('conv2') as conv2: + W_conv2 = weight_variable([3,3,4,8]) b_conv2 = bias_variable([8]) - h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h') + a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a') + h_conv2 = tf.nn.relu(a_conv2, name='h') h_pool2 = max_pool_2x2(h_conv2) h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob, name='h_pool_drop') - h_pool2_drop_flat = tf.reshape(h_pool2_drop, [-1, 392], name='h_pool_drop_flat') + h_pool2_drop_shape = h_pool2_drop.get_shape() + atr_feat_dim = reduce(lambda f, g: f*g, + [dim.value for dim in h_pool2_drop_shape[1:]]) + atr_feat = tf.reshape(h_pool2_drop, [-1, atr_feat_dim], + name='atr_feat') + + print('Attribute feature dimension: ' + str(atr_feat_dim)) #392 + with tf.name_scope('fc1') as fc1: - W_obj_fc1 = weight_variable([392, 4], var_name='W_obj') - W_atr_fc1 = weight_variable([392, 4], var_name='W_atr') + + W_obj_fc1 = weight_variable([graph_config['obj_feat_dim'], + graph_config['num_attributes']], + var_name='W_obj') + W_atr_fc1 = weight_variable([atr_feat_dim, + graph_config['num_attributes']], + var_name='W_atr') b_fc1 = bias_variable([4]) - y_pred = tf.nn.softmax(tf.matmul(h_pool2_drop_flat, W_atr_fc1) + tf.matmul(obj_feat, W_obj_fc1) + b_fc1) - tf.add_to_collection('atr_feat', h_pool2_drop_flat) + logits_atr = tf.matmul(atr_feat, W_atr_fc1, name='logits_atr') + logits_obj = tf.matmul(obj_feat, W_obj_fc1, name='logits_obj') + logits = 0.5*logits_atr + 0.5*logits_obj + b_fc1 + + y_pred = tf.nn.softmax(logits, name='softmax') +# tf.add_to_collection('atr_feat', h_pool2_drop_flat) return y_pred + def ans_comp_graph(image_regions, questions, keep_prob, \ obj_feat, atr_feat, vocab, inv_vocab, ans_vocab_size): with tf.name_scope('ans') as ans_graph: @@ -104,7 +155,7 @@ def ans_comp_graph(image_regions, questions, keep_prob, \ with tf.name_scope('q_embed') as q_embed: q_feat = tf.matmul(questions, word_vecs) # q_feat = tf.truediv(q_feat, tf.cast(len(vocab),tf.float32)) - q_feat = tf.truediv(q_feat, tf.reduce_sum(questions,1,keep_dims=True)) + # q_feat = tf.truediv(q_feat, tf.reduce_sum(questions,1,keep_dims=True)) with tf.name_scope('conv1') as conv1: W_conv1 = weight_variable([5,5,3,4]) @@ -125,9 +176,9 @@ def ans_comp_graph(image_regions, questions, keep_prob, \ fc1_dim = 300 W_region_fc1 = weight_variable([392, fc1_dim], var_name='W_region') W_obj_fc1 = weight_variable([392, fc1_dim], var_name='W_obj', - std=math.sqrt(3/(392+ans_vocab_size))) + std=math.sqrt(3.0/(2.0*392.0+50.0+ans_vocab_size))) W_atr_fc1 = weight_variable([392, fc1_dim], var_name='W_atr', - std=math.sqrt(3/(392+ans_vocab_size))) + std=math.sqrt(3.0/(2.0*392.0+50.0+ans_vocab_size))) W_q_fc1 = weight_variable([50, fc1_dim], var_name='W_q', std=math.sqrt(3.0/(50.0+ans_vocab_size))) b_fc1 = bias_variable([fc1_dim]) @@ -135,7 +186,6 @@ def ans_comp_graph(image_regions, questions, keep_prob, \ h_tmp = tf.matmul(q_feat, W_q_fc1) + b_fc1 + \ tf.matmul(obj_feat, W_obj_fc1) + \ tf.matmul(atr_feat, W_atr_fc1) - #tf.matmul(h_pool2_drop_flat, W_region_fc1) + \ h_fc1 = tf.nn.relu(h_tmp, name='h') @@ -144,6 +194,7 @@ def ans_comp_graph(image_regions, questions, keep_prob, \ with tf.name_scope('fc2') as fc2: W_fc2 = weight_variable([fc1_dim, ans_vocab_size], std=math.sqrt(3.0/(fc1_dim))) + b_fc2 = bias_variable([ans_vocab_size]) logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 diff --git a/classifiers/tf_graph_creation_helper.pyc b/classifiers/tf_graph_creation_helper.pyc index 4defda24febbdc5564e8f0d5b4820a16d9ee72e0..9b27a5400c59cc016fa82b56436b143641f67169 100644 Binary files a/classifiers/tf_graph_creation_helper.pyc and b/classifiers/tf_graph_creation_helper.pyc differ diff --git a/classifiers/train_classifiers.py b/classifiers/train_classifiers.py index 26002627000919dc268e1180da95ef1ab8e3863f..2fa365ee5660d01698d94a308ab9887318f11d68 100644 --- a/classifiers/train_classifiers.py +++ b/classifiers/train_classifiers.py @@ -14,16 +14,16 @@ workflow = { 'train_obj': False, 'eval_obj': False, 'train_atr': False, - 'eval_atr': False, + 'eval_atr': True, } obj_classifier_train_params = { 'out_dir': '/home/tanmay/Code/GenVQA/Exp_Results/Obj_Classifier', - 'adam_lr': 0.001, + 'adam_lr': 0.0001, 'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json', 'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images', -# 'mean_image': '/home/tanmay/Code/GenVQA/Exp_Results/Obj_Classifier/mean_image.npy', - 'mean_image': '', + 'mean_image': '/home/tanmay/Code/GenVQA/Exp_Results/Obj_Classifier/mean_image.npy', +# 'mean_image': '', } obj_classifier_eval_params = {