Skip to content
Snippets Groups Projects
Commit f49cbdac authored by tgupta6's avatar tgupta6
Browse files

Need to debug

parent 94a18a41
No related branches found
No related tags found
No related merge requests found
import sys
import os
import json
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
import pdb
import tensorflow as tf
import tf_graph_creation_helper as graph_creator
import plot_helper as plotter
import ans_data_io_helper as ans_io_helper
import region_ranker.perfect_ranker as region_proposer
import train_ans_classifier as ans_trainer
def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
image_dir, mean_image, start_index, val_set_size, batch_size,
placeholders, img_height=100, img_width=100):
inv_ans_vocab = {v: k for k, v in ans_vocab.items()}
pred_list = []
correct = 0
max_iter = int(math.ceil(val_set_size*1.0/batch_size))
# print ([val_set_size, batch_size])
# print('max_iter: ' + str(max_iter))
batch_size_tmp = batch_size
for i in xrange(max_iter):
if i==(max_iter-1):
batch_size_tmp = val_set_size - i*batch_size
print('Iter: ' + str(i+1) + '/' + str(max_iter))
# print batch_size_tmp
region_images, ans_labels, questions, \
region_score, partition= \
ans_io_helper.ans_mini_batch_loader(qa_anno_dict,
region_anno_dict,
ans_vocab, vocab,
image_dir, mean_image,
start_index+i*batch_size,
batch_size_tmp,
img_height, img_width, 3)
# print [start_index+i*batch_size,
# start_index+i*batch_size + batch_size_tmp -1]
if i==max_iter-1:
residual_batch_size = batch_size - batch_size_tmp
residual_regions = residual_batch_size*ans_io_helper.num_proposals
residual_region_images = np.zeros(shape=[residual_regions,
img_height/3, img_width/3,
3])
residual_questions = np.zeros(shape=[residual_regions,
len(vocab)])
residual_ans_labels = np.zeros(shape=[residual_batch_size,
len(ans_vocab)])
residual_region_score = np.zeros(shape=[1, residual_regions])
region_images = np.concatenate((region_images,
residual_region_images),
axis=0)
questions = np.concatenate((questions, residual_questions), axis=0)
ans_labels = np.concatenate((ans_labels, residual_ans_labels),
axis=0)
region_score = np.concatenate((region_score, residual_region_score),
axis=1)
# print region_images.shape
# print questions.shape
# print ans_labels.shape
# print region_score.shape
feed_dict = {
placeholders[0] : region_images,
placeholders[1] : questions,
placeholders[2] : 1.0,
placeholders[3] : ans_labels,
placeholders[4] : region_score,
}
ans_ids = np.argmax(y.eval(feed_dict), 1)
for j in xrange(batch_size_tmp):
pred_list = pred_list + [{
'question_id' : start_index+i*batch_size+j,
'answer' : inv_ans_vocab[ans_ids[j]]
}]
# print qa_anno_dict[start_index+i*batch_size+j].question
# print inv_ans_vocab[ans_ids[j]]
return pred_list
def eval(eval_params):
sess = tf.InteractiveSession()
train_anno_filename = eval_params['train_json']
test_anno_filename = eval_params['test_json']
regions_anno_filename = eval_params['regions_json']
image_regions_dir = eval_params['image_regions_dir']
outdir = eval_params['outdir']
model = eval_params['model']
batch_size = eval_params['batch_size']
test_start_id = eval_params['test_start_id']
test_set_size = eval_params['test_set_size']
if not os.path.exists(outdir):
os.mkdir(outdir)
qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename)
qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename)
region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train)
# Create graph
g = tf.get_default_graph()
image_regions, questions, keep_prob, y, region_score= \
graph_creator.placeholder_inputs_ans(len(vocab), len(ans_vocab),
mode='gt')
y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
obj_feat = obj_feat_op.outputs[0]
y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
atr_feat = atr_feat_op.outputs[0]
y_pred = graph_creator.ans_comp_graph(image_regions, questions, keep_prob,
obj_feat, atr_feat, vocab,
inv_vocab, len(ans_vocab),
eval_params['mode'])
y_avg = graph_creator.aggregate_y_pred(y_pred, region_score, batch_size,
ans_io_helper.num_proposals,
len(ans_vocab))
cross_entropy = graph_creator.loss(y, y_avg)
accuracy = graph_creator.evaluation(y, y_avg)
# Restore model
saver = tf.train.Saver()
if os.path.exists(model):
saver.restore(sess, model)
else:
print 'Failed to read model from file ' + model
mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
'Obj_Classifier/mean_image.npy')
placeholders = [image_regions, questions, keep_prob, y, region_score]
# Get predictions
pred_dict =get_pred(y_avg, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
image_regions_dir, mean_image, test_start_id,
test_set_size, batch_size, placeholders, 75, 75)
json_filename = os.path.join(outdir, 'predicted_ans_' + \
eval_params['mode'] + '.json')
with open(json_filename,'w') as json_file:
json.dump(pred_dict, json_file)
if __name__=='__main__':
ans_classifier_eval_params = {
'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
'image_regions_dir': '/mnt/ramdisk/image_regions',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier',
'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier/ans_classifier_q_obj_atr-9',
'mode' : 'q_obj_atr',
'batch_size': 20,
'test_start_id': 111352, #+48600,
'test_set_size': 160725-111352+1,
}
eval(ans_classifier_eval_params)
......@@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
import random
import pdb
import tensorflow as tf
import object_classifiers.obj_data_io_helper as obj_data_loader
......@@ -14,18 +15,17 @@ import plot_helper as plotter
import ans_data_io_helper as ans_io_helper
import region_ranker.perfect_ranker as region_proposer
import time
val_start_id = 106115
val_batch_size = 5000
val_batch_size_small = 100
batch_size = 20
crop_n_save_regions = False
restore_intermediate_model = True
val_set_size = 5000
val_set_size_small = 100
def evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
image_dir, mean_image, start_index, val_batch_size,
image_dir, mean_image, start_index, val_set_size, batch_size,
placeholders, img_height=100, img_width=100):
correct = 0
max_iter = int(math.floor(val_batch_size/batch_size))
max_iter = int(math.floor(val_set_size/batch_size))
for i in xrange(max_iter):
region_images, ans_labels, questions, \
region_score, partition= \
......@@ -53,24 +53,15 @@ def evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
def train(train_params):
sess = tf.InteractiveSession()
train_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/train_anno.json'
test_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/test_anno.json'
regions_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/regions_anno.json'
image_dir = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/images'
train_anno_filename = train_params['train_json']
test_anno_filename = train_params['test_json']
regions_anno_filename = train_params['regions_json']
image_dir = train_params['image_dir']
image_regions_dir = train_params['image_regions_dir']
outdir = train_params['outdir']
obj_atr_model = train_params['obj_atr_model']
batch_size = train_params['batch_size']
# image_regions_dir = '/home/tanmay/Code/GenVQA/Exp_Results/' + \
# 'image_regions'
image_regions_dir = '/mnt/ramdisk/image_regions'
outdir = '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier'
if not os.path.exists(outdir):
os.mkdir(outdir)
......@@ -80,7 +71,7 @@ def train(train_params):
vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
# Save region crops
if crop_n_save_regions == True:
if train_params['crop_n_save_regions'] == True:
qa_anno_dict_test = ans_io_helper.parse_qa_anno(test_anno_filename)
ans_io_helper.save_regions(image_dir, image_regions_dir,
qa_anno_dict, region_anno_dict,
......@@ -91,24 +82,27 @@ def train(train_params):
# Create graph
g = tf.get_default_graph()
image_regions, questions, keep_prob, y, region_score= \
graph_creator.placeholder_inputs_ans(len(vocab), len(ans_vocab),
mode='gt')
y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
obj_feat = tf.get_collection('obj_feat', scope='obj/conv2')
y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat[0])
atr_feat = tf.get_collection('atr_feat', scope='atr/conv2')
# model restoration
# obj_atr_saver = tf.train.Saver()
model_to_restore = '/home/tanmay/Code/GenVQA/GenVQA/classifiers/' + \
'saved_models/obj_atr_classifier-1'
# obj_atr_saver.restore(sess, model_to_restore)
y_pred, logits = graph_creator.ans_comp_graph(image_regions,
questions, keep_prob, \
obj_feat[0], atr_feat[0],
vocab, inv_vocab, len(ans_vocab))
obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
obj_feat = obj_feat_op.outputs[0]
y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
atr_feat = atr_feat_op.outputs[0]
# Restore obj and attribute classifier parameters
obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj')
atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr')
obj_atr_saver = tf.train.Saver(obj_vars+atr_vars)
obj_atr_saver.restore(sess, obj_atr_model)
y_pred = graph_creator.ans_comp_graph(image_regions, questions, keep_prob,
obj_feat, atr_feat, vocab,
inv_vocab, len(ans_vocab),
train_params['mode'])
y_avg = graph_creator.aggregate_y_pred(y_pred, region_score, batch_size,
ans_io_helper.num_proposals,
len(ans_vocab))
......@@ -117,66 +111,137 @@ def train(train_params):
accuracy = graph_creator.evaluation(y, y_avg)
# Collect variables
vars_to_opt = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans')
train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \
.minimize(cross_entropy, var_list=vars_to_opt)
word_embed = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans/word_embed')
vars_to_restore = \
tf.get_collection(tf.GraphKeys.VARIABLES,scope='obj') + \
tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + \
[word_embed[0]]
ans_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans')
list_of_vars = [
'ans/word_embed/word_vecs',
'ans/fc1/W_region',
'ans/fc1/W_obj',
'ans/fc1/W_atr',
'ans/fc1/W_q',
'ans/fc1/b',
'ans/fc2/W',
'ans/fc2/b'
]
vars_dict = graph_creator.get_list_of_variables(list_of_vars)
all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
if train_params['mode']=='q':
pretrained_vars_high_lr = []
pretrained_vars_low_lr = []
partial_model = ''
vars_to_init = [var for var in all_vars if var not in vars_to_restore]
vars_to_save = tf.get_collection(tf.GraphKeys.VARIABLES,scope='obj') + \
tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr') + \
tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans')
print('vars_to_save: ')
print([var.name for var in vars_to_save])
# Session saver
saver = tf.train.Saver(vars_to_restore)
saver2 = tf.train.Saver(vars_to_save)
if restore_intermediate_model==True:
intermediate_model = '/home/tanmay/Code/GenVQA/Exp_Results/' + \
'Ans_Classifier/ans_classifier_question_only-9'
print('vars_to_restore: ')
print([var.name for var in vars_to_restore])
print('vars_to_init: ')
print([var.name for var in vars_to_init])
saver.restore(sess, intermediate_model)
# print('Initializing variables')
sess.run(tf.initialize_variables(vars_to_init))
start_epoch = 0
elif train_params['mode']=='q_obj_atr' or \
train_params['mode']=='q_reg':
pretrained_vars_low_lr = [
vars_dict['ans/word_embed/word_vecs'],
]
pretrained_vars_high_lr = [
vars_dict['ans/fc1/W_q'],
vars_dict['ans/fc1/b'],
vars_dict['ans/fc2/W'],
vars_dict['ans/fc2/b']
]
partial_model = os.path.join(outdir, 'ans_classifier_q-' + \
str(train_params['start_model']))
elif train_params['mode']=='q_obj_atr_reg':
pretrained_vars_low_lr = [
vars_dict['ans/word_embed/word_vecs'],
vars_dict['ans/fc1/W_q'],
vars_dict['ans/fc1/W_obj'],
vars_dict['ans/fc1/W_atr'],
vars_dict['ans/fc1/b'],
]
pretrained_vars_high_lr = [
vars_dict['ans/fc2/W'],
vars_dict['ans/fc2/b']
]
partial_model = os.path.join(outdir, 'ans_classifier_q_obj_atr-' + \
str(train_params['start_model']))
# Fine tune begining with a previous model
if train_params['fine_tune']==True:
partial_model = os.path.join(outdir, 'ans_classifier_' + \
train_params['mode'] + '-' + \
str(train_params['start_model']))
start_epoch = train_params['start_model']+1
else:
# Initializing all variables except those restored
print('Initializing variables')
sess.run(tf.initialize_variables(vars_to_init))
start_epoch = 0
# Restore partial model
vars_to_save = obj_vars + atr_vars + ans_vars
partial_saver = tf.train.Saver(vars_to_save)
if os.path.exists(partial_model):
partial_saver.restore(sess, partial_model)
# Variables to train from scratch
all_pretrained_vars = pretrained_vars_low_lr + pretrained_vars_high_lr
vars_to_train_from_scratch = \
[var for var in ans_vars if var not in all_pretrained_vars]
# Attach optimization ops
train_step_high_lr = tf.train.AdamOptimizer(train_params['adam_high_lr']) \
.minimize(cross_entropy,
var_list=vars_to_train_from_scratch
+ pretrained_vars_high_lr)
print('Parameters trained with high lr (' +
str(train_params['adam_high_lr']) + '): ')
print([var.name for var in vars_to_train_from_scratch
+ pretrained_vars_high_lr])
if pretrained_vars_low_lr:
train_step_low_lr = tf.train \
.AdamOptimizer(train_params['adam_low_lr']) \
.minimize(cross_entropy,
var_list=pretrained_vars_low_lr)
print('Parameters trained with low lr(' +
str(train_params['adam_low_lr']) + '): ')
print([var.name for var in pretrained_vars_low_lr])
all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
if train_params['fine_tune']==False:
vars_to_init = [var for var in all_vars if var not in
obj_vars + atr_vars + all_pretrained_vars]
else:
vars_to_init = [var for var in all_vars if var not in vars_to_save]
# Initialize vars_to_init
sess.run(tf.initialize_variables(vars_to_init))
print('All pretrained variables: ')
print([var.name for var in all_pretrained_vars])
print('Variables to train from scratch: ')
print([var.name for var in vars_to_train_from_scratch])
print('Variables to initialize randomly: ')
print([var.name for var in vars_to_init])
print('Variables to save: ')
print([var.name for var in vars_to_save])
# Load mean image
mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
'Obj_Classifier/mean_image.npy')
placeholders = [image_regions, questions, keep_prob, y, region_score]
# Variables to observe
W_fc2 = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans/fc2/W')
q_feat = tf.get_collection('q_feat', scope='ans/q_embed')
if train_params['fine_tune']==True:
restored_accuracy = evaluate(accuracy, qa_anno_dict,
region_anno_dict, ans_vocab,
vocab, image_regions_dir,
mean_image, val_start_id,
val_set_size, batch_size,
placeholders, 75, 75)
print('Accuracy of restored model: ' + str(restored_accuracy))
# Start Training
# batch_size = 1
max_epoch = 10
max_epoch = train_params['max_epoch']
max_iter = 5000
val_acc_array_epoch = np.zeros([max_epoch])
train_acc_array_epoch = np.zeros([max_epoch])
for epoch in range(start_epoch, max_epoch):
start = time.time()
for i in range(max_iter):
iter_ids = range(max_iter)
random.shuffle(iter_ids)
for i in iter_ids: #range(max_iter):
train_region_images, train_ans_labels, train_questions, \
train_region_score, train_partition= \
......@@ -194,100 +259,63 @@ def train(train_params):
region_score: train_region_score,
}
if pretrained_vars_low_lr:
_, _, current_train_batch_acc, y_pred_eval, loss_eval = \
sess.run([train_step_low_lr, train_step_high_lr,
accuracy, y_pred, cross_entropy],
feed_dict=feed_dict_train)
else:
_, current_train_batch_acc, y_pred_eval, loss_eval = \
sess.run([train_step_high_lr, accuracy,
y_pred, cross_entropy],
feed_dict=feed_dict_train)
assert (not np.any(np.isnan(y_pred_eval))), 'NaN predicted'
try:
assert (not np.any(np.isnan(q_feat[0].eval(feed_dict_train))))
except AssertionError:
print('NaN in q_feat')
print(1+i*batch_size)
print(train_questions)
print(logits.eval(feed_dict_train))
print(cross_entropy.eval(feed_dict_train))
exit(1)
start1 = time.time()
_, current_train_batch_acc, y_avg_eval, y_pred_eval, logits_eval, W_fc2_eval = \
sess.run([train_step, accuracy, y_avg, y_pred, logits, W_fc2[0]],
feed_dict=feed_dict_train)
end1 = time.time()
# print('Training Pass: ' + str(end1-start1))
train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] + \
current_train_batch_acc
# pdb.set_trace()
try:
assert (not np.any(np.isnan(W_fc2_eval)))
except AssertionError:
print('NaN in W_fc2')
print(1+i*batch_size)
print(W_fc2_eval)
exit(1)
try:
assert (not np.any(np.isnan(logits_eval)))
except AssertionError:
print('NaN in logits')
print(1+i*batch_size)
print(y_avg_eval)
exit(1)
try:
assert (not np.any(np.isnan(y_avg_eval)))
except AssertionError:
print('NaN in y_avg')
print(1+i*batch_size)
print(logits_eval)
print(y_avg_eval)
exit(1)
if (i+1)%500==0:
print(logits_eval[0:22,:])
print(train_region_score[0,0:22])
print(train_ans_labels[0,:])
# print(train_ans_labels[0,:])
print(y_avg_eval[0,:])
# print(y_pred_eval)
val_accuracy = evaluate(accuracy, qa_anno_dict,
region_anno_dict, ans_vocab, vocab,
image_regions_dir, mean_image,
val_start_id, val_batch_size_small,
placeholders, 75, 75)
val_start_id, val_set_size_small,
batch_size, placeholders, 75, 75)
print('Iter: ' + str(i+1) + ' Val Sm Acc: ' + str(val_accuracy))
end = time.time()
print('Per Iter Time: ' + str(end-start))
print('Iter: ' + str(i+1) + ' Val Sm Acc: ' + str(val_accuracy)
+ ' Loss: ' + str(loss_eval))
train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] / max_iter
start = time.time()
val_acc_array_epoch[epoch] = evaluate(accuracy, qa_anno_dict,
region_anno_dict, ans_vocab, vocab,
image_regions_dir, mean_image,
val_start_id, val_batch_size,
region_anno_dict, ans_vocab,
vocab, image_regions_dir,
mean_image, val_start_id,
val_set_size, batch_size,
placeholders, 75, 75)
end=time.time()
print('Per Validation Time: ' + str(end-start))
print('Val Acc: ' + str(val_acc_array_epoch[epoch]) +
' Train Acc: ' + str(train_acc_array_epoch[epoch]))
plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1,
if train_params['fine_tune']==True:
plot_path = os.path.join(outdir, 'acc_vs_epoch_' \
+ train_params['mode'] + '_fine_tuned.pdf')
else:
plot_path = os.path.join(outdir, 'acc_vs_epoch_' \
+ train_params['mode'] + '.pdf')
plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1,
ydata_train=train_acc_array_epoch[0:epoch + 1],
ydata_val=val_acc_array_epoch[0:epoch + 1],
xlim=[1, max_epoch], ylim=[0, 1.0],
savePath=os.path.join(outdir,
'acc_vs_epoch_q_o_atr.pdf'))
savePath=plot_path)
save_path = saver2.save(sess,
os.path.join(outdir,'ans_classifier_question_obj_atr'),
global_step=epoch)
save_path = partial_saver \
.save(sess, os.path.join(outdir, 'ans_classifier_' + \
train_params['mode']), global_step=epoch)
sess.close()
tf.reset_default_graph()
if __name__=='__main__':
train_params = {
'adam_lr' : 0.0001,
}
train(train_params)
print 'Hello'
File added
......@@ -8,13 +8,29 @@ graph_config = {
'num_attributes': 4,
'obj_feat_dim': 392,
'atr_feat_dim': 392,
'region_feat_dim': 392,
'word_vec_dim': 50,
'ans_fc1_dim': 300,
}
def get_variable(var_scope):
var_list = tf.get_collection(tf.GraphKeys.VARIABLES, scope=var_scope)
assert len(var_list)==1, 'Multiple variables exist by that name'
return var_list[0]
def get_list_of_variables(var_scope_list):
var_dict = dict()
for var_scope in var_scope_list:
var_dict[var_scope] = get_variable(var_scope)
return var_dict
def weight_variable(tensor_shape, fan_in=None, var_name='W'):
if fan_in==None:
fan_in = reduce(lambda x, y: x*y, tensor_shape[0:-1])
stddev = math.sqrt(2.0/fan_in)
print(stddev)
initial = tf.truncated_normal(shape=tensor_shape, mean=0.0, stddev=stddev)
return tf.Variable(initial_value=initial, name=var_name)
......@@ -94,7 +110,7 @@ def obj_comp_graph(x, keep_prob):
logits = tf.add(tf.matmul(obj_feat, W_fc1), b_fc1, name='logits')
y_pred = tf.nn.softmax(logits, name='softmax')
# tf.add_to_collection('obj_feat', h_pool2_drop_flat)
return y_pred
......@@ -140,73 +156,110 @@ def atr_comp_graph(x, keep_prob, obj_feat):
logits = 0.5*logits_atr + 0.5*logits_obj + b_fc1
y_pred = tf.nn.softmax(logits, name='softmax')
# tf.add_to_collection('atr_feat', h_pool2_drop_flat)
return y_pred
def ans_comp_graph(image_regions, questions, keep_prob, \
obj_feat, atr_feat, vocab, inv_vocab, ans_vocab_size):
def ans_comp_graph(image_regions, questions, keep_prob, obj_feat, atr_feat,
vocab, inv_vocab, ans_vocab_size, mode):
vocab_size = len(vocab)
with tf.name_scope('ans') as ans_graph:
with tf.name_scope('word_embed') as word_embed:
initial = tf.truncated_normal(shape=[len(vocab),50],
stddev=math.sqrt(3.0/(31.0+300.0)))
word_vecs = tf.Variable(initial, name='word_vecs')
with tf.name_scope('q_embed') as q_embed:
q_feat = tf.matmul(questions, word_vecs)
# q_feat = tf.truediv(q_feat, tf.cast(len(vocab),tf.float32))
# q_feat = tf.truediv(q_feat, tf.reduce_sum(questions,1,keep_dims=True))
word_vecs = weight_variable([vocab_size,
graph_config['word_vec_dim']],
var_name='word_vecs')
q_feat = tf.matmul(questions, word_vecs, name='q_feat')
with tf.name_scope('conv1') as conv1:
W_conv1 = weight_variable([5,5,3,4])
b_conv1 = bias_variable([4])
h_conv1 = tf.nn.relu(conv2d(image_regions, W_conv1) + b_conv1, name='h')
a_conv1 = tf.add(conv2d(image_regions, W_conv1), b_conv1, name='a')
h_conv1 = tf.nn.relu(a_conv1, name='h')
h_pool1 = max_pool_2x2(h_conv1)
h_conv1_drop = tf.nn.dropout(h_pool1, keep_prob, name='h_pool_drop')
with tf.name_scope('conv2') as conv2:
W_conv2 = weight_variable([3,3,4,8])
b_conv2 = bias_variable([8])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h')
a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a')
h_conv2 = tf.nn.relu(a_conv2, name='h')
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob, name='h_pool_drop')
h_pool2_drop_flat = tf.reshape(h_pool2_drop, [-1, 392], name='h_pool_drop_flat')
h_pool2_drop_shape = h_pool2_drop.get_shape()
region_feat_dim = reduce(lambda f, g: f*g,
[dim.value for dim in h_pool2_drop_shape[1:]])
region_feat = tf.reshape(h_pool2_drop, [-1, region_feat_dim],
name='region_feat')
print('Region feature dimension: ' + str(region_feat_dim)) #392
with tf.name_scope('fc1') as fc1:
fc1_dim = 300
W_region_fc1 = weight_variable([392, fc1_dim], var_name='W_region')
W_obj_fc1 = weight_variable([392, fc1_dim], var_name='W_obj',
std=math.sqrt(3.0/(2.0*392.0+50.0+ans_vocab_size)))
W_atr_fc1 = weight_variable([392, fc1_dim], var_name='W_atr',
std=math.sqrt(3.0/(2.0*392.0+50.0+ans_vocab_size)))
W_q_fc1 = weight_variable([50, fc1_dim], var_name='W_q',
std=math.sqrt(3.0/(50.0+ans_vocab_size)))
fc1_dim = graph_config['ans_fc1_dim']
W_region_fc1 = weight_variable([graph_config['region_feat_dim'],
fc1_dim], var_name='W_region')
W_obj_fc1 = weight_variable([graph_config['obj_feat_dim'],
fc1_dim], var_name='W_obj')
W_atr_fc1 = weight_variable([graph_config['atr_feat_dim'],
fc1_dim], var_name='W_atr')
W_q_fc1 = weight_variable([graph_config['word_vec_dim'],
fc1_dim], var_name='W_q')
b_fc1 = bias_variable([fc1_dim])
h_tmp = tf.matmul(q_feat, W_q_fc1) + b_fc1 + \
tf.matmul(obj_feat, W_obj_fc1) + \
tf.matmul(atr_feat, W_atr_fc1)
#tf.matmul(h_pool2_drop_flat, W_region_fc1) + \
a_fc1_region = tf.matmul(region_feat, W_region_fc1,
name='a_fc1_region')
a_fc1_obj = tf.matmul(obj_feat, W_obj_fc1, name='a_fc1_obj')
a_fc1_atr = tf.matmul(atr_feat, W_atr_fc1, name='a_fc1_atr')
a_fc1_q = tf.matmul(q_feat, W_q_fc1, name='a_fc1_q')
coeff_reg = 0.0
coeff_obj = 0.0
coeff_atr = 0.0
coeff_q = 0.0
if mode=='q':
coeff_q = 1.0
elif mode=='q_reg':
coeff_q = 1/2.0
coeff_region = 1/2.0
elif mode=='q_obj_atr':
coeff_q = 1/3.0
coeff_obj = 1/3.0
coeff_atr = 1/3.0
elif mode=='q_obj_atr_reg':
coeff_q = 1/4.0
coeff_obj = 1/4.0
coeff_atr = 1/4.0
coeff_reg = 1/4.0
a_fc1 = coeff_reg * a_fc1_region + \
coeff_obj * a_fc1_obj + \
coeff_atr * a_fc1_atr + \
coeff_q * a_fc1_q
h_fc1 = tf.nn.relu(h_tmp, name='h')
h_fc1 = tf.nn.relu(a_fc1, name='h')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_drop')
with tf.name_scope('fc2') as fc2:
W_fc2 = weight_variable([fc1_dim, ans_vocab_size],
std=math.sqrt(3.0/(fc1_dim)))
W_fc2 = weight_variable([fc1_dim, ans_vocab_size])
b_fc2 = bias_variable([ans_vocab_size])
logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
logits = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2, name='logits')
y_pred = tf.nn.softmax(logits)
y_pred = tf.nn.softmax(logits, name='softmax')
tf.add_to_collection('region_feat', h_pool2_drop_flat)
tf.add_to_collection('q_feat', q_feat)
return y_pred
return y_pred, logits
def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals, ans_vocab_size):
def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals,
ans_vocab_size):
y_pred_list = tf.split(0, batch_size, y_pred)
region_score_list = tf.split(1, batch_size, region_score)
y_avg_list = []
......@@ -214,17 +267,23 @@ def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals, ans_vocab_
y_avg_list.append(tf.matmul(region_score_list[i],y_pred_list[i]))
y_avg = tf.concat(0, y_avg_list)
return y_avg
def evaluation(y, y_pred):
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1), name='correct_prediction')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1),
name='correct_prediction')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
name='accuracy')
return accuracy
def loss(y, y_pred):
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred), name='cross_entropy')
y_pred_clipped = tf.clip_by_value(y_pred, 1e-10, 1)
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred_clipped),
name='cross_entropy')
batch_size = tf.shape(y)
return tf.truediv(cross_entropy, tf.cast(batch_size[0],tf.float32))
print 'Batch Size:' + str(tf.cast(batch_size[0],tf.float32))
return tf.truediv(cross_entropy, tf.cast(20,tf.float32))#batch_size[0],tf.float32))
if __name__ == '__main__':
......
No preview for this file type
......@@ -9,14 +9,18 @@ import object_classifiers.train_obj_classifier as obj_trainer
import object_classifiers.eval_obj_classifier as obj_evaluator
import attribute_classifiers.train_atr_classifier as atr_trainer
import attribute_classifiers.eval_atr_classifier as atr_evaluator
import answer_classifier.train_ans_classifier as ans_trainer
workflow = {
'train_obj': False,
'eval_obj': False,
'train_atr': False,
'eval_atr': True,
'eval_atr': False,
'train_ans': True,
}
ans_mode = ['q']
obj_classifier_train_params = {
'out_dir': '/home/tanmay/Code/GenVQA/Exp_Results/Obj_Classifier',
'adam_lr': 0.0001,
......@@ -55,6 +59,24 @@ atr_classifier_eval_params = {
'html_dir': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/html_dir',
}
ans_classifier_train_params = {
'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
'image_regions_dir': '/mnt/ramdisk/image_regions',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier',
'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1',
'adam_high_lr' : 0.0001,
'adam_low_lr' : 0.0000,#1,
'mode' : 'q_reg',
'crop_n_save_regions': False,
'max_epoch': 10,
'batch_size': 20,
'fine_tune': False,
'start_model': 9,
}
if __name__=='__main__':
if workflow['train_obj']:
obj_trainer.train(obj_classifier_train_params)
......@@ -67,3 +89,6 @@ if __name__=='__main__':
if workflow['eval_atr']:
atr_evaluator.eval(atr_classifier_eval_params)
if workflow['train_ans']:
ans_trainer.train(ans_classifier_train_params)
......@@ -7,7 +7,7 @@ if __name__== "__main__":
res_data = json.load(f_res);
anno_data = json.load(f_anno);
print(len(anno_data))
assert(len(res_data) == len(anno_data))
res_dict = dict()
# convert to map with qid as key
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment