Skip to content
Snippets Groups Projects
Commit 5b71bdb8 authored by tgupta6's avatar tgupta6
Browse files

relevance network with explicit features, ans network with margin loss and wordvec sharing

parent fe740717
No related branches found
No related tags found
No related merge requests found
......@@ -81,6 +81,17 @@ def get_vocab(qa_dict):
return vocab, inv_vocab
def join_vocab(vocab, ans_vocab):
joint_vocab = vocab.copy()
count = len(joint_vocab)
for word in ans_vocab.keys():
if word not in joint_vocab:
joint_vocab[word] = count
count += 1
return joint_vocab
def save_regions(image_dir, out_dir, qa_dict, region_anno_dict, start_id,
batch_size, img_width, img_height):
......@@ -91,7 +102,7 @@ def save_regions(image_dir, out_dir, qa_dict, region_anno_dict, start_id,
region_shape = np.array([img_height/3, img_width/3], np.int32)
image_done = dict()
for i in xrange(batch_size):
for i in xrange(start_id, start_id + batch_size):
image_id = qa_dict[i].image_id
image_done[image_id] = False
......@@ -228,9 +239,9 @@ atr_labels = {
}
class feed_dict_creator():
def __init__(self, region_images, ans_labels, parsed_q,
region_score, keep_prob, plholder_dict, vocab):
class FeedDictCreator():
def __init__(self, region_images, parsed_q,
keep_prob, plholder_dict, vocab):
self.plholder_dict = plholder_dict
self.parsed_q = parsed_q
self.vocab = vocab
......@@ -238,8 +249,6 @@ class feed_dict_creator():
self.feed_dict = {
plholder_dict['image_regions']: region_images,
plholder_dict['keep_prob']: keep_prob,
plholder_dict['gt_answer']: ans_labels,
plholder_dict['region_score']: region_score,
}
self.add_bin('bin0')
self.add_bin('bin1')
......@@ -281,13 +290,31 @@ class feed_dict_creator():
containment = np.zeros([num_q, num_labels], dtype='float32')
for q_num in xrange(num_q):
for i, label in labels.items():
if label in [pq.lower() for pq in self.parsed_q[q_num][bin_name]]:
if label in [pq.lower() for pq in \
self.parsed_q[q_num][bin_name]]:
containment[q_num,i] = 1
plholder = self.plholder_dict[bin_name + '_' + \
label_type + '_' + 'cont']
self.feed_dict[plholder] = containment
class RelFeedDictCreator(FeedDictCreator):
def __init__(self, region_images, parsed_q,
gt_region_scores, keep_prob, plholder_dict, vocab):
FeedDictCreator.__init__(self, region_images, parsed_q,
keep_prob, plholder_dict, vocab)
self.feed_dict[plholder_dict['gt_scores']] = gt_region_scores
class AnsFeedDictCreator(FeedDictCreator):
def __init__(self, region_images, ans_labels, parsed_q,
region_scores, keep_prob, plholder_dict, vocab):
FeedDictCreator.__init__(self, region_images, parsed_q,
keep_prob, plholder_dict, vocab)
self.feed_dict[plholder_dict['gt_answer']] = ans_labels
self.feed_dict[plholder_dict['region_score']] = region_scores
class html_ans_table_writer():
def __init__(self, filename):
......
......@@ -15,9 +15,9 @@ import region_ranker.perfect_ranker as region_proposer
import train_ans_classifier as ans_trainer
from PIL import Image, ImageDraw
def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
def get_pred(y, qa_anno_dict, region_anno_dict, parsed_q_dict, ans_vocab, vocab,
image_dir, mean_image, start_index, val_set_size, batch_size,
placeholders, img_height, img_width, batch_creator):
plholder_dict, img_height, img_width, batch_creator):
inv_ans_vocab = {v: k for k, v in ans_vocab.items()}
pred_list = []
......@@ -30,14 +30,14 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
print('Iter: ' + str(i+1) + '/' + str(max_iter))
region_images, ans_labels, questions, \
region_images, ans_labels, parsed_q, \
region_score, partition = batch_creator \
.ans_mini_batch_loader(qa_anno_dict,
region_anno_dict,
ans_vocab, vocab,
image_dir, mean_image,
start_index+i*batch_size,
batch_size_tmp,
batch_size_tmp, parsed_q_dict,
img_height, img_width, 3)
if i==max_iter-1:
......@@ -48,8 +48,9 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
residual_region_images = np.zeros(shape=[residual_regions,
img_height/3, img_width/3,
3])
residual_questions = np.zeros(shape=[residual_regions,
len(vocab)])
# residual_questions = np.zeros(shape=[residual_regions,
# len(vocab)])
residual_ans_labels = np.zeros(shape=[residual_batch_size,
len(ans_vocab)])
residual_region_score = np.zeros(shape=[1, residual_regions])
......@@ -57,19 +58,29 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
region_images = np.concatenate((region_images,
residual_region_images),
axis=0)
questions = np.concatenate((questions, residual_questions), axis=0)
# questions = np.concatenate((questions, residual_questions), axis=0)
for k in xrange(batch_size_tmp*22, batch_size*22):
parsed_q[k] = {
'bin0': [''],
'bin1': [''],
'bin2': [''],
'bin3': [''],
}
ans_labels = np.concatenate((ans_labels, residual_ans_labels),
axis=0)
region_score = np.concatenate((region_score, residual_region_score),
axis=1)
feed_dict = {
placeholders[0] : region_images,
placeholders[1] : questions,
placeholders[2] : 1.0,
placeholders[3] : ans_labels,
placeholders[4] : region_score,
}
feed_dict = ans_io_helper \
.AnsFeedDictCreator(region_images,
ans_labels,
parsed_q,
region_score,
1.0,
plholder_dict,
vocab).feed_dict
ans_ids = np.argmax(y.eval(feed_dict), 1)
for j in xrange(batch_size_tmp):
......@@ -78,13 +89,6 @@ def get_pred(y, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
'answer' : inv_ans_vocab[ans_ids[j]]
}]
# g = tf.get_default_graph()
# q_feat_op = g.get_operation_by_name('ans/word_embed/q_feat')
# q_feat = q_feat_op.outputs[0]
# region_feat_op = g.get_operation_by_name('ans/conv2/region_feat')
# region_feat = region_feat_op.outputs[0]
# pdb.set_trace()
return pred_list
def eval(eval_params):
......@@ -92,6 +96,7 @@ def eval(eval_params):
train_anno_filename = eval_params['train_json']
test_anno_filename = eval_params['test_json']
parsed_q_filename = eval_params['parsed_q_json']
regions_anno_filename = eval_params['regions_json']
image_regions_dir = eval_params['image_regions_dir']
outdir = eval_params['outdir']
......@@ -104,38 +109,47 @@ def eval(eval_params):
qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename)
qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename)
parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train)
# Create graph
g = tf.get_default_graph()
image_regions, questions, keep_prob, y, region_score= \
graph_creator.placeholder_inputs_ans(len(vocab), len(ans_vocab),
mode='gt')
plholder_dict = graph_creator.placeholder_inputs_ans(len(vocab),
len(ans_vocab),
mode='gt')
image_regions = plholder_dict['image_regions']
questions = plholder_dict['questions']
keep_prob = plholder_dict['keep_prob']
y = plholder_dict['gt_answer']
region_score = plholder_dict['region_score']
y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
obj_feat = obj_feat_op.outputs[0]
y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
atr_feat = atr_feat_op.outputs[0]
pred_rel_score = graph_creator.rel_comp_graph(image_regions, questions,
pred_rel_score = graph_creator.rel_comp_graph(plholder_dict,
obj_feat, atr_feat,
y_pred_obj, y_pred_atr,
'q_obj_atr_reg',
1.0, len(vocab), batch_size)
y_pred = graph_creator.ans_comp_graph(image_regions, questions, keep_prob,
obj_feat, atr_feat, vocab,
inv_vocab, len(ans_vocab),
eval_params['mode'])
y_pred = graph_creator.ans_comp_margin_graph(plholder_dict,
obj_feat, atr_feat,
y_pred_obj, y_pred_atr,
vocab, inv_vocab, ans_vocab,
eval_params['mode'])
pred_rel_score_vec = tf.reshape(pred_rel_score,
[1, batch_size*ans_io_helper.num_proposals])
y_avg = graph_creator.aggregate_y_pred(y_pred, pred_rel_score_vec,
batch_size,
ans_io_helper.num_proposals,
len(ans_vocab))
cross_entropy = graph_creator.loss(y, y_avg)
accuracy = graph_creator.evaluation(y, y_avg)
# Collect variables
......@@ -160,16 +174,15 @@ def eval(eval_params):
mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
'Obj_Classifier/mean_image.npy')
placeholders = [image_regions, questions, keep_prob, y, region_score]
# Batch creator
test_batch_creator = ans_io_helper.batch_creator(test_start_id,
test_start_id
+ test_set_size - 1)
# Get predictions
pred_dict = get_pred(y_avg, qa_anno_dict, region_anno_dict, ans_vocab,
pred_dict = get_pred(y_avg, qa_anno_dict, region_anno_dict,
parsed_q_dict, ans_vocab,
vocab, image_regions_dir, mean_image, test_start_id,
test_set_size, batch_size, placeholders, 75, 75,
test_set_size, batch_size, plholder_dict, 75, 75,
test_batch_creator)
json_filename = os.path.join(outdir, 'predicted_ans_' + \
......@@ -271,11 +284,12 @@ if __name__=='__main__':
'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
'image_regions_dir': '/mnt/ramdisk/image_regions',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_w_Rel',
'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob/rel_classifier_q_obj_atr_reg-4',
'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_w_Rel/ans_classifier_' + mode + '-' + str(model_num),
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin',
'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt/rel_classifier_q_obj_atr_reg_explt-9',
'model': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin/ans_classifier_' + mode + '-' + str(model_num),
'mode' : mode,
'batch_size': 20,
'test_start_id': 94645,
......
......@@ -31,9 +31,12 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
'ans/fc1/W_obj',
'ans/fc1/W_atr',
'ans/fc1/W_q',
'ans/fc1/W_explt',
'ans/fc1/b',
'ans/fc2/W',
'ans/fc2/b'
'ans/fc2/W_feat',
'ans/fc2/b_feat',
'ans/fc2/W_ans',
'ans/fc2/b_ans'
]
vars_dict = graph_creator.get_list_of_variables(list_of_vars)
......@@ -43,8 +46,10 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
vars_dict['ans/word_embed/word_vecs'],
vars_dict['ans/fc1/W_q'],
vars_dict['ans/fc1/b'],
vars_dict['ans/fc2/W'],
vars_dict['ans/fc2/b'],
vars_dict['ans/fc2/W_feat'],
vars_dict['ans/fc2/b_feat'],
vars_dict['ans/fc2/W_ans'],
vars_dict['ans/fc2/b_ans'],
]
reg_ans_params = [
......@@ -57,6 +62,7 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
obj_ans_params = [
vars_dict['ans/fc1/W_obj'],
vars_dict['ans/fc1/W_explt']
]
atr_ans_params = [
......@@ -88,9 +94,9 @@ def get_process_flow_vars(mode, obj_vars, atr_vars, rel_vars, fine_tune):
elif mode=='q_obj_atr_reg':
vars_to_train += reg_ans_params
if not mode=='q':
vars_to_train = [var for var in vars_to_train if \
'ans/word_embed/word_vecs' not in var.name]
# if not mode=='q':
# vars_to_train = [var for var in vars_to_train if \
# 'ans/word_embed/word_vecs' not in var.name]
# Fine tune begining with a previous model
if fine_tune==True:
......@@ -148,9 +154,9 @@ def evaluate(accuracy, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
img_height, img_width, 3)
feed_dict = ans_io_helper.\
feed_dict_creator(region_images, ans_labels, parsed_q,
region_score, 1.0, plholder_dict,
vocab).feed_dict
AnsFeedDictCreator(region_images, ans_labels, parsed_q,
region_score, 1.0, plholder_dict,
vocab).feed_dict
correct = correct + accuracy.eval(feed_dict)
......@@ -179,6 +185,7 @@ def train(train_params):
region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
# vocab = ans_io_helper.join_vocab(vocab, ans_vocab)
# Save region crops
if train_params['crop_n_save_regions'] == True:
......@@ -209,39 +216,38 @@ def train(train_params):
atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
atr_feat = atr_feat_op.outputs[0]
# pred_rel_score = graph_creator.rel_comp_graph(image_regions, questions,
# obj_feat, atr_feat,
# 'q_obj_atr_reg', 1.0,
# len(vocab), batch_size)
pred_rel_score = graph_creator.rel_comp_graph(plholder_dict,
obj_feat, atr_feat,
y_pred_obj, y_pred_atr,
'q_obj_atr_reg_explt',
1.0, len(vocab), batch_size)
# Restore rel, obj and attribute classifier parameters
# rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel')
rel_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='rel')
obj_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='obj')
atr_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='atr')
# rel_saver = tf.train.Saver(rel_vars)
rel_saver = tf.train.Saver(rel_vars)
obj_atr_saver = tf.train.Saver(obj_vars+atr_vars)
# rel_saver.restore(sess, rel_model)
rel_saver.restore(sess, rel_model)
obj_atr_saver.restore(sess, obj_atr_model)
y_pred = graph_creator.ans_comp_graph(plholder_dict,
obj_feat, atr_feat, vocab,
inv_vocab, len(ans_vocab),
train_params['mode'])
# pred_rel_score_vec = tf.reshape(pred_rel_score,
# [1, batch_size*ans_io_helper.num_proposals])
# y_avg = graph_creator.aggregate_y_pred(y_pred,
# pred_rel_score_vec, batch_size,
# ans_io_helper.num_proposals,
# len(ans_vocab))
y_pred = graph_creator.ans_comp_margin_graph(plholder_dict,
obj_feat, atr_feat,
y_pred_obj, y_pred_atr,
vocab, inv_vocab, ans_vocab,
train_params['mode'])
pred_rel_score_vec = tf.reshape(pred_rel_score,
[1, batch_size*ans_io_helper.num_proposals])
y_avg = graph_creator.aggregate_y_pred(y_pred,
region_score, batch_size,
pred_rel_score_vec, batch_size,
ans_io_helper.num_proposals,
len(ans_vocab))
cross_entropy = graph_creator.loss(y, y_avg)
# cross_entropy = graph_creator.loss(y, y_avg)
margin_loss = graph_creator.margin_loss(y, y_avg, 0.2)
accuracy = graph_creator.evaluation(y, y_avg)
# Collect variables
......@@ -249,7 +255,7 @@ def train(train_params):
pretrained_vars, vars_to_train, vars_to_restore, vars_to_save, \
vars_to_init, vars_dict = \
get_process_flow_vars(train_params['mode'],
obj_vars, atr_vars, [], #rel_vars,
obj_vars, atr_vars, rel_vars,
train_params['fine_tune'])
# Regularizers
......@@ -265,9 +271,11 @@ def train(train_params):
vars_dict['ans/fc1/W_obj'],
vars_dict['ans/fc1/W_atr'],
vars_dict['ans/fc1/W_q'],
vars_dict['ans/fc1/W_explt'],
]
ans_fc2_params = [vars_dict['ans/fc2/W']]
ans_fc2_params = [vars_dict['ans/fc2/W_feat'],
vars_dict['ans/fc2/W_ans']]
regularizer_ans_word_vecs = graph_creator \
.regularize_params(ans_word_vec_params)
......@@ -277,7 +285,12 @@ def train(train_params):
regularizer_ans_fcs = graph_creator \
.regularize_params(ans_fc1_params + ans_fc2_params)
total_loss = cross_entropy + \
# total_loss = margin_loss + \
# 1e-5 * regularizer_ans_word_vecs + \
# 1e-5 * regularizer_ans_fcs + \
# 1e-3 * regularizer_ans_filters
total_loss = margin_loss + \
1e-5 * regularizer_ans_word_vecs + \
1e-5 * regularizer_ans_fcs + \
1e-3 * regularizer_ans_filters
......@@ -323,7 +336,7 @@ def train(train_params):
# Initialize vars_to_init
all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
optimizer_vars = [var for var in all_vars if var not in \
obj_vars + atr_vars + ans_vars] #rel_vars + ans_vars]
obj_vars + atr_vars + rel_vars + ans_vars]
print('Optimizer Variables: ')
print([var.name for var in optimizer_vars])
......@@ -334,7 +347,7 @@ def train(train_params):
mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
'Obj_Classifier/mean_image.npy')
placeholders = [image_regions, questions, keep_prob, y, region_score]
# placeholders = [image_regions, questions, keep_prob, y, region_score]
# Start Training
max_epoch = train_params['max_epoch']
......@@ -381,13 +394,13 @@ def train(train_params):
75, 75, 3)
feed_dict_train = ans_io_helper \
.feed_dict_creator(train_region_images,
train_ans_labels,
train_parsed_q,
train_region_score,
0.5,
plholder_dict,
vocab).feed_dict
.AnsFeedDictCreator(train_region_images,
train_ans_labels,
train_parsed_q,
train_region_score,
0.5,
plholder_dict,
vocab).feed_dict
_, current_train_batch_acc, y_avg_eval, loss_eval = \
......
class baseclass():
def __init__(self, a):
print a
def baseMethod(self):
print 'Yeah inheritance'
class derivedclass(baseclass):
def __init__(self, a, b):
baseclass.__init__(self, a)
print b
self.baseMethod()
a = derivedclass(1,2)
......@@ -17,6 +17,7 @@ def eval(eval_params):
sess = tf.InteractiveSession()
train_anno_filename = eval_params['train_json']
test_anno_filename = eval_params['test_json']
parsed_q_filename = eval_params['parsed_q_json']
regions_anno_filename = eval_params['regions_json']
image_regions_dir = eval_params['image_regions_dir']
outdir = eval_params['outdir']
......@@ -33,6 +34,7 @@ def eval(eval_params):
qa_anno_dict_train = ans_io_helper.parse_qa_anno(train_anno_filename)
qa_anno_dict = ans_io_helper.parse_qa_anno(test_anno_filename)
parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict_train)
......@@ -40,24 +42,24 @@ def eval(eval_params):
# Create graph
g = tf.get_default_graph()
image_regions, questions, y, keep_prob = \
plholder_dict = \
graph_creator.placeholder_inputs_rel(ans_io_helper.num_proposals,
len(vocab), mode='gt')
placeholders = [image_regions, questions, y, keep_prob]
image_regions = plholder_dict['image_regions']
y = plholder_dict['gt_scores']
keep_prob = plholder_dict['keep_prob']
y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
obj_feat = obj_feat_op.outputs[0]
y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
atr_feat = atr_feat_op.outputs[0]
y_pred = graph_creator.rel_comp_graph(image_regions, questions,
y_pred = graph_creator.rel_comp_graph(plholder_dict,
obj_feat, atr_feat,
y_pred_obj, y_pred_atr, mode,
keep_prob, len(vocab), batch_size)
# y_pred = graph_creator.rel_comp_graph(image_regions, questions,
# obj_feat, atr_feat, mode,
# keep_prob, len(vocab), batch_size)
# Restore model
restorer = tf.train.Saver()
if os.path.exists(model):
......@@ -76,11 +78,11 @@ def eval(eval_params):
# Test Recall
test_recall = rel_trainer.evaluate(y_pred, qa_anno_dict,
region_anno_dict, ans_vocab,
vocab, image_regions_dir,
mean_image, test_start_id,
test_set_size, batch_size,
placeholders, 75, 75,
test_batch_creator,verbose=True)
region_anno_dict, parsed_q_dict,
ans_vocab, vocab,
image_regions_dir, mean_image,
test_start_id, test_set_size,
batch_size, plholder_dict,
75, 75, test_batch_creator,verbose=True)
print('Test Rec: ' + str(test_recall))
......@@ -39,34 +39,37 @@ def batch_recall(pred_scores, gt_scores, k):
return batch_recall
def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, ans_vocab, vocab,
image_dir, mean_image, start_index, val_set_size, batch_size,
placeholders, img_height, img_width, batch_creator, verbose=False):
def evaluate(region_score_pred, qa_anno_dict, region_anno_dict, parsed_q_dict,
ans_vocab, vocab, image_dir, mean_image, start_index, val_set_size,
batch_size, plholder_dict, img_height, img_width, batch_creator,
verbose=False):
recall_at_k = 0
max_iter = int(math.floor(val_set_size/batch_size))
for i in xrange(max_iter):
if verbose==True:
print('Iter: ' + str(i+1) + '/' + str(max_iter))
region_images, ans_labels, questions, \
region_score_vec, partition= batch_creator \
region_images, ans_labels, parsed_q, \
region_scores_vec, partition= batch_creator \
.ans_mini_batch_loader(qa_anno_dict, region_anno_dict,
ans_vocab, vocab, image_dir, mean_image,
start_index+i*batch_size, batch_size,
parsed_q_dict,
img_height, img_width, 3)
region_score = batch_creator.reshape_score(region_score_vec)
region_scores = batch_creator.reshape_score(region_scores_vec)
feed_dict = {
placeholders[0] : region_images,
placeholders[1] : questions,
placeholders[2] : region_score,
placeholders[3] : 1.0,
}
feed_dict = ans_io_helper \
.RelFeedDictCreator(region_images,
parsed_q,
region_scores,
1.0,
plholder_dict,
vocab).feed_dict
region_score_pred_eval = region_score_pred.eval(feed_dict)
recall_at_k += batch_recall(region_score_pred_eval,
region_score, -1)
region_scores, -1)
recall_at_k /= max_iter
......@@ -77,6 +80,7 @@ def train(train_params):
sess = tf.InteractiveSession()
train_anno_filename = train_params['train_json']
test_anno_filename = train_params['test_json']
parsed_q_filename = train_params['parsed_q_json']
regions_anno_filename = train_params['regions_json']
image_dir = train_params['image_dir']
image_regions_dir = train_params['image_regions_dir']
......@@ -89,6 +93,7 @@ def train(train_params):
os.mkdir(outdir)
qa_anno_dict = ans_io_helper.parse_qa_anno(train_anno_filename)
parsed_q_dict = ans_io_helper.read_parsed_questions(parsed_q_filename)
region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
......@@ -106,24 +111,24 @@ def train(train_params):
# Create graph
g = tf.get_default_graph()
image_regions, questions, y, keep_prob = \
plholder_dict = \
graph_creator.placeholder_inputs_rel(ans_io_helper.num_proposals,
len(vocab), mode='gt')
placeholders = [image_regions, questions, y, keep_prob]
image_regions = plholder_dict['image_regions']
y = plholder_dict['gt_scores']
keep_prob = plholder_dict['keep_prob']
y_pred_obj = graph_creator.obj_comp_graph(image_regions, 1.0)
obj_feat_op = g.get_operation_by_name('obj/conv2/obj_feat')
obj_feat = obj_feat_op.outputs[0]
y_pred_atr = graph_creator.atr_comp_graph(image_regions, 1.0, obj_feat)
atr_feat_op = g.get_operation_by_name('atr/conv2/atr_feat')
atr_feat = atr_feat_op.outputs[0]
y_pred = graph_creator.rel_comp_graph(image_regions, questions,
y_pred = graph_creator.rel_comp_graph(plholder_dict,
obj_feat, atr_feat,
y_pred_obj, y_pred_atr, mode,
keep_prob, len(vocab), batch_size)
# y_pred = graph_creator.rel_comp_graph(image_regions, questions,
# obj_feat, atr_feat, mode,
# keep_prob, len(vocab), batch_size)
accuracy = graph_creator.evaluation(y, y_pred)
cross_entropy = graph_creator.loss(y, y_pred)
......@@ -139,6 +144,7 @@ def train(train_params):
'rel/fc1/W_q',
'rel/fc1/W_obj',
'rel/fc1/W_atr',
'rel/fc1/W_explt',
'rel/fc1/b',
'rel/fc2/W',
'rel/fc2/b',
......@@ -161,6 +167,7 @@ def train(train_params):
vars_dict['rel/fc1/W_q'],
vars_dict['rel/fc1/W_obj'],
vars_dict['rel/fc1/W_atr'],
vars_dict['rel/fc1/W_explt'],
vars_dict['rel/fc2/W'],
]
......@@ -244,12 +251,12 @@ def train(train_params):
# Check accuracy of restored model
if train_params['fine_tune']==True:
restored_recall = evaluate(y_pred, qa_anno_dict,
region_anno_dict, ans_vocab,
restored_recall = evaluate(y_pred, qa_anno_dict, region_anno_dict,
parsed_q_dict, ans_vocab,
vocab, image_regions_dir,
mean_image, val_start_id,
val_set_size, batch_size,
placeholders, 75, 75,
plholder_dict, 75, 75,
val_batch_creator)
print('Recall of restored model: ' + str(restored_recall))
......@@ -261,23 +268,26 @@ def train(train_params):
train_batch_creator.shuffle_ids()
for i in range(max_iter):
train_region_images, train_ans_labels, train_questions, \
train_region_images, train_ans_labels, train_parsed_q, \
train_region_score_vec, train_partition= train_batch_creator \
.ans_mini_batch_loader(qa_anno_dict, region_anno_dict,
ans_vocab, vocab,
image_regions_dir, mean_image,
1+i*batch_size, batch_size,
1+i*batch_size, batch_size,
parsed_q_dict,
75, 75, 3)
train_region_score = train_batch_creator \
.reshape_score(train_region_score_vec)
feed_dict_train = {
image_regions : train_region_images,
questions: train_questions,
keep_prob: 0.5,
y: train_region_score,
}
feed_dict_train = ans_io_helper \
.RelFeedDictCreator(train_region_images,
train_parsed_q,
train_region_score,
0.5,
plholder_dict,
vocab).feed_dict
_, current_train_batch_acc, y_pred_eval, loss_eval = \
sess.run([train_step, accuracy, y_pred, total_loss],
feed_dict=feed_dict_train)
......@@ -289,23 +299,23 @@ def train(train_params):
train_region_score, -1)
if (i+1)%500==0:
val_recall = evaluate(y_pred, qa_anno_dict,
region_anno_dict, ans_vocab, vocab,
val_recall = evaluate(y_pred, qa_anno_dict, region_anno_dict,
parsed_q_dict, ans_vocab, vocab,
image_regions_dir, mean_image,
val_start_id, val_set_size_small,
batch_size, placeholders, 75, 75,
batch_size, plholder_dict, 75, 75,
val_small_batch_creator)
print('Iter: ' + str(i+1) + ' Val Sm Rec: ' + str(val_recall))
train_rec_array_epoch[epoch] = train_rec_array_epoch[epoch] / max_iter
val_rec_array_epoch[epoch] = evaluate(y_pred, qa_anno_dict,
region_anno_dict, ans_vocab,
vocab, image_regions_dir,
mean_image, val_start_id,
val_set_size, batch_size,
placeholders, 75, 75,
val_batch_creator)
region_anno_dict, parsed_q_dict,
ans_vocab, vocab,
image_regions_dir, mean_image,
val_start_id, val_set_size,
batch_size, plholder_dict,
75, 75, val_batch_creator)
print('Val Rec: ' + str(val_rec_array_epoch[epoch]) +
' Train Rec: ' + str(train_rec_array_epoch[epoch]))
......
......@@ -70,17 +70,33 @@ def placeholder_inputs(mode = 'gt'):
def placeholder_inputs_rel(num_proposals, total_vocab_size, mode = 'gt'):
image_regions = tf.placeholder(tf.float32, shape=[None,25,25,3])
keep_prob = tf.placeholder(tf.float32)
questions = tf.placeholder(tf.float32, shape=[None,total_vocab_size])
plholder_dict = {
'image_regions': tf.placeholder(tf.float32, [None,25,25,3],
'image_regions'),
'keep_prob': tf.placeholder(tf.float32, name='keep_prob'),
}
for i in xrange(4):
bin_name = 'bin' + str(i)
plholder_dict[bin_name + '_shape'] = \
tf.placeholder(tf.int64, [2], bin_name + '_shape')
plholder_dict[bin_name + '_indices'] = \
tf.placeholder(tf.int64, [None, 2], bin_name + '_indices')
plholder_dict[bin_name + '_values'] = \
tf.placeholder(tf.int64, [None], bin_name + '_values')
plholder_dict[bin_name + '_obj_cont'] = \
tf.placeholder(tf.float32, [None, graph_config['num_objects']],
bin_name + '_obj_cont')
plholder_dict[bin_name + '_atr_cont'] = \
tf.placeholder(tf.float32, [None, graph_config['num_attributes']],
bin_name + '_atr_cont')
if mode == 'gt':
print 'Creating placeholder for ground truth'
y = tf.placeholder(tf.float32,
shape=[None, ans_io_helper.num_proposals])
return (image_regions, questions, y, keep_prob)
plholder_dict['gt_scores'] = tf.placeholder(tf.float32,\
shape=[None, ans_io_helper.num_proposals], name = 'gt_scores')
return plholder_dict
if mode == 'no_gt':
print 'No placeholder for ground truth'
return (image_regions, questions, keep_prob)
return plholder_dict
def placeholder_inputs_ans(total_vocab_size, ans_vocab_size, mode='gt'):
......@@ -204,16 +220,58 @@ def atr_comp_graph(x, keep_prob, obj_feat):
return y_pred
def rel_comp_graph(image_regions, questions, obj_feat, atr_feat,
mode, keep_prob, vocab_size, batch_size):
def q_bin_embed_graph(bin_name, word_vecs, plholder_dict):
indices = plholder_dict[bin_name + '_indices']
values = plholder_dict[bin_name + '_values']
shape = plholder_dict[bin_name + '_shape']
sp_ids = tf.SparseTensor(indices, values, shape)
return tf.nn.embedding_lookup_sparse(word_vecs, sp_ids, None,
name=bin_name + '_embedding')
def explicit_feat_graph(bin_name, classifier_prob,
classifier_type, plholder_dict):
cont_plholder_name = bin_name + '_' + classifier_type + '_cont'
feat_name = 'explt_' + bin_name + '_' + classifier_type
dot_product = tf.mul(classifier_prob, plholder_dict[cont_plholder_name])
return tf.reduce_mean(dot_product, 1, keep_dims=True, name=feat_name)
def rel_comp_graph(plholder_dict, obj_feat, atr_feat,
obj_prob, atr_prob, mode, keep_prob,
vocab_size, batch_size):
image_regions = plholder_dict['image_regions']
with tf.name_scope('rel') as rel_graph:
with tf.name_scope('word_embed') as q_embed:
word_vecs = weight_variable([vocab_size,
graph_config['word_vec_dim']],
var_name='word_vecs')
q_feat = tf.matmul(questions, word_vecs, name='q_feat')
bin0_embed = q_bin_embed_graph('bin0', word_vecs, plholder_dict)
bin1_embed = q_bin_embed_graph('bin1', word_vecs, plholder_dict)
bin2_embed = q_bin_embed_graph('bin2', word_vecs, plholder_dict)
bin3_embed = q_bin_embed_graph('bin3', word_vecs, plholder_dict)
q_feat = tf.concat(1, [bin0_embed,
bin1_embed,
bin2_embed,
bin3_embed], name='q_feat')
with tf.name_scope('explicit_feat') as expl_feat:
explt_feat_list = []
for bin_num in xrange(4):
bin_name = 'bin'+ str(bin_num)
explt_feat_list.append(explicit_feat_graph(bin_name, obj_prob,
'obj', plholder_dict))
explt_feat_list.append(explicit_feat_graph(bin_name, atr_prob,
'atr', plholder_dict))
concat_explt_feat = tf.concat(1, explt_feat_list,
name = 'concat_explt_feat')
concat_explt_feat_dim = concat_explt_feat.get_shape()[1].value
print('Concatenate explicit feature dimension: ' + \
str(concat_explt_feat_dim))
with tf.name_scope('conv1') as conv1:
W_conv1 = weight_variable([5,5,3,4])
......@@ -248,47 +306,57 @@ def rel_comp_graph(image_regions, questions, obj_feat, atr_feat,
print 'Atr feat dim: {}'.format(atr_feat_dim)
W_reg_fc1 = weight_variable([reg_feat_dim, fc1_dim],
var_name='W_reg')
W_q_fc1 = weight_variable([graph_config['word_vec_dim'],
W_q_fc1 = weight_variable([graph_config['q_embed_dim'],
fc1_dim], var_name='W_q')
W_obj_fc1 = weight_variable([obj_feat_dim,
fc1_dim], var_name='W_obj')
W_atr_fc1 = weight_variable([atr_feat_dim,
fc1_dim], var_name='W_atr')
W_explt_fc1 = weight_variable([concat_explt_feat_dim,
fc1_dim], var_name='W_explt')
b_fc1 = bias_variable([fc1_dim])
a_reg_fc1 = tf.matmul(reg_feat, W_reg_fc1, name='a_reg_fc1')
a_q_fc1 = tf.matmul(q_feat, W_q_fc1, name='a_q_fc1')
a_obj_fc1 = tf.matmul(obj_feat, W_obj_fc1, name='a_obj_fc1')
a_atr_fc1 = tf.matmul(atr_feat, W_atr_fc1, name='a_atr_fc1')
a_explt_fc1 = tf.matmul(concat_explt_feat, W_explt_fc1,
name='a_explt_fc1')
coeff = {
'reg': 0.0,
'q': 0.0,
'obj': 0.0,
'atr': 0.0,
'explt': 0.0,
}
if mode=='q_reg':
if mode=='q_reg_explt':
print mode
coeff['reg'] = 1/2.0
coeff['q'] = 1/2.0
coeff['reg'] = 1/3.0
coeff['q'] = 1/3.0
coeff['explt'] = 1/3.0
elif mode=='q_obj_atr':
elif mode=='q_obj_atr_explt':
print mode
coeff['q'] = 1/3.0
coeff['obj'] = 1/3.0
coeff['atr'] = 1/3.0
coeff['q'] = 0.1
coeff['obj'] = 0.1
coeff['atr'] = 0.1
coeff['explt'] = 0.7
elif mode=='q_obj_atr_reg':
elif mode=='q_obj_atr_reg_explt':
print mode
coeff['q'] = 1/4.0
coeff['obj'] = 1/4.0
coeff['atr'] = 1/4.0
coeff['reg'] = 1/4.0
coeff['q'] = 0.05
coeff['obj'] = 0.05
coeff['atr'] = 0.05
coeff['reg'] = 0.05
coeff['explt'] = 0.8
elif mode=='explt':
coeff['explt'] = 1.0
a_fc1 = coeff['reg']*a_reg_fc1 + coeff['q']*a_q_fc1 + \
coeff['obj']*a_obj_fc1 + coeff['atr']*a_atr_fc1 + \
b_fc1
coeff['explt']*a_explt_fc1 + b_fc1
h_fc1 = tf.nn.relu(a_fc1, name='h')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_drop')
......@@ -306,16 +374,7 @@ def rel_comp_graph(image_regions, questions, obj_feat, atr_feat,
y_pred = tf.nn.softmax(logits, name='softmax')
return y_pred
def q_bin_embed_graph(bin_name, word_vecs, plholder_dict):
indices = plholder_dict[bin_name + '_indices']
values = plholder_dict[bin_name + '_values']
shape = plholder_dict[bin_name + '_shape']
sp_ids = tf.SparseTensor(indices, values, shape)
return tf.nn.embedding_lookup_sparse(word_vecs, sp_ids, None,
name=bin_name + '_embedding')
def ans_comp_graph(plholder_dict, obj_feat, atr_feat,
vocab, inv_vocab, ans_vocab_size, mode):
......@@ -426,6 +485,160 @@ def ans_comp_graph(plholder_dict, obj_feat, atr_feat,
return y_pred
def ans_comp_margin_graph(plholder_dict, obj_feat, atr_feat, obj_prob, atr_prob,
vocab, inv_vocab, ans_vocab, mode):
vocab_size = len(vocab)
image_regions = plholder_dict['image_regions']
keep_prob = plholder_dict['keep_prob']
ans_vocab_size = len(ans_vocab)
inv_ans_vocab = {v:k for k, v in ans_vocab.items()}
ans_in_vocab_ids_list = []
for i in xrange(ans_vocab_size):
ans_in_vocab_ids_list.append(vocab[inv_ans_vocab[i]])
ans_in_vocab_ids_tensor = tf.constant(ans_in_vocab_ids_list, dtype=tf.int64)
with tf.name_scope('ans') as ans_graph:
with tf.name_scope('word_embed') as word_embed:
word_vecs = weight_variable([vocab_size,
graph_config['word_vec_dim']],
var_name='word_vecs')
bin0_embed = q_bin_embed_graph('bin0', word_vecs, plholder_dict)
bin1_embed = q_bin_embed_graph('bin1', word_vecs, plholder_dict)
bin2_embed = q_bin_embed_graph('bin2', word_vecs, plholder_dict)
bin3_embed = q_bin_embed_graph('bin3', word_vecs, plholder_dict)
q_feat = tf.concat(1, [bin0_embed,
bin1_embed,
bin2_embed,
bin3_embed], name='q_feat')
ans_embed = tf.nn.embedding_lookup(word_vecs, ans_in_vocab_ids_list,
name='ans_embed')
with tf.name_scope('explicit_feat') as expl_feat:
explt_feat_list = []
for bin_num in xrange(4):
bin_name = 'bin'+ str(bin_num)
explt_feat_list.append(explicit_feat_graph(bin_name, obj_prob,
'obj', plholder_dict))
explt_feat_list.append(explicit_feat_graph(bin_name, atr_prob,
'atr', plholder_dict))
concat_explt_feat = tf.concat(1, explt_feat_list,
name = 'concat_explt_feat')
concat_explt_feat_dim = concat_explt_feat.get_shape()[1].value
print('Concatenate explicit feature dimension: ' + \
str(concat_explt_feat_dim))
with tf.name_scope('conv1') as conv1:
num_filters_conv1 = 4
W_conv1 = weight_variable([5,5,3,num_filters_conv1])
b_conv1 = bias_variable([num_filters_conv1])
a_conv1 = tf.add(conv2d(image_regions, W_conv1), b_conv1, name='a')
h_conv1 = tf.nn.relu(a_conv1, name='h')
h_pool1 = max_pool_2x2(h_conv1)
h_conv1_drop = tf.nn.dropout(h_pool1, keep_prob, name='h_pool_drop')
with tf.name_scope('conv2') as conv2:
num_filters_conv2 = 8
W_conv2 = weight_variable([3,3,num_filters_conv1,num_filters_conv2])
b_conv2 = bias_variable([num_filters_conv2])
a_conv2 = tf.add(conv2d(h_pool1, W_conv2), b_conv2, name='a')
h_conv2 = tf.nn.relu(a_conv2, name='h')
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob, name='h_pool_drop')
h_pool2_drop_shape = h_pool2_drop.get_shape()
region_feat_dim = reduce(lambda f, g: f*g,
[dim.value for dim in h_pool2_drop_shape[1:]])
region_feat = tf.reshape(h_pool2_drop, [-1, region_feat_dim],
name='region_feat')
print('Region feature dimension: ' + str(region_feat_dim)) #392
with tf.name_scope('fc1') as fc1:
fc1_dim = graph_config['ans_fc1_dim']
W_region_fc1 = weight_variable([region_feat_dim,
fc1_dim], var_name='W_region')
W_obj_fc1 = weight_variable([graph_config['obj_feat_dim'],
fc1_dim], var_name='W_obj')
W_atr_fc1 = weight_variable([graph_config['atr_feat_dim'],
fc1_dim], var_name='W_atr')
W_q_fc1 = weight_variable([graph_config['q_embed_dim'],
fc1_dim], var_name='W_q')
W_explt_fc1 = weight_variable([concat_explt_feat_dim,
fc1_dim], var_name='W_explt')
b_fc1 = bias_variable([fc1_dim])
a_fc1_region = tf.matmul(region_feat, W_region_fc1,
name='a_fc1_region')
a_fc1_obj = tf.matmul(obj_feat, W_obj_fc1, name='a_fc1_obj')
a_fc1_atr = tf.matmul(atr_feat, W_atr_fc1, name='a_fc1_atr')
a_fc1_q = tf.matmul(q_feat, W_q_fc1, name='a_fc1_q')
a_explt_fc1 = tf.matmul(concat_explt_feat, W_explt_fc1,
name='a_explt_fc1')
coeff_reg = 0.0
coeff_obj = 0.0
coeff_atr = 0.0
coeff_q = 0.0
coeff_explt = 0.0
if mode=='q':
coeff_q = 1.0
elif mode=='q_reg':
coeff_q = 1/2.0
coeff_reg = 1/2.0
elif mode=='q_obj_atr':
coeff_q = 1/4.0
coeff_obj = 1/4.0
coeff_atr = 1/4.0
coeff_explt = 1/4.0
elif mode=='q_obj_atr_reg':
coeff_q = 1/5.0
coeff_obj = 1/5.0
coeff_atr = 1/5.0
coeff_reg = 1/5.0
coeff_explt = 1/5.0
a_fc1 = coeff_reg * a_fc1_region + \
coeff_obj * a_fc1_obj + \
coeff_atr * a_fc1_atr + \
coeff_q * a_fc1_q
h_fc1 = tf.nn.relu(a_fc1, name='h')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_drop')
with tf.name_scope('fc2') as fc2:
W_feat_fc2 = weight_variable([fc1_dim,
graph_config['word_vec_dim']],
var_name='W_feat')
b_feat_fc2 = bias_variable([graph_config['word_vec_dim']],
var_name='b_feat')
W_ans_fc2 = weight_variable([graph_config['word_vec_dim'],
graph_config['word_vec_dim']],
var_name='W_ans')
b_ans_fc2 = bias_variable([graph_config['word_vec_dim']],
var_name='b_ans')
comb_feat_embed = tf.add(tf.matmul(h_fc1_drop, W_feat_fc2),
b_feat_fc2,
name='comb_feat_embed')
comb_ans_embed = tf.add(tf.matmul(ans_embed, W_ans_fc2),
b_ans_fc2,
name='comb_feat_embed')
ans_scores = tf.matmul(comb_feat_embed, tf.transpose(comb_ans_embed),
name='ans_scores')
ans_scores = tf.nn.l2_normalize(ans_scores, 1)*3.0
return tf.nn.softmax(ans_scores)
def aggregate_y_pred(y_pred, region_score, batch_size, num_proposals,
ans_vocab_size):
y_pred_list = tf.split(0, batch_size, y_pred)
......@@ -453,6 +666,12 @@ def loss(y, y_pred):
return tf.truediv(cross_entropy, tf.cast(batch_size[0],tf.float32))
def margin_loss(y, y_pred, margin):
correct_score = tf.reduce_sum(tf.mul(y, y_pred), 1,
keep_dims=True, name='correct_score')
return tf.reduce_mean(tf.maximum(0.0, y + margin - correct_score))
def regularize_params(param_list):
regularizer = tf.zeros(shape=[])
for param in param_list:
......
......@@ -65,16 +65,17 @@ rel_classifier_train_params = {
'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
'image_regions_dir': '/mnt/ramdisk/image_regions',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt',
'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1',
'mode': 'q_obj_atr',
'adam_lr' : 0.001,
'mode': 'q_obj_atr_reg_explt',
'adam_lr' : 0.0001,
'crop_n_save_regions': False,
'max_epoch': 5,
'max_epoch': 10,
'batch_size': 10,
'fine_tune': False,
'fine_tune': True,
'start_model': 4, # Used only if fine_tune is True
}
......@@ -82,12 +83,13 @@ rel_classifier_eval_params = {
'train_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/train_anno.json',
'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
'regions_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/regions_anno.json',
'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
'image_regions_dir': '/mnt/ramdisk/image_regions',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob',
'model_basedir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Prob',
'model_number': 4,
'mode': 'q_obj_atr',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt',
'model_basedir': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt',
'model_number': 9,
'mode': 'q_obj_atr_reg_explt',
'batch_size': 20,
'test_start_id': 94645,
'test_set_size': 143495-94645+1,
......@@ -100,15 +102,15 @@ ans_classifier_train_params = {
'parsed_q_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/parsed_questions.json',
'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
'image_regions_dir': '/mnt/ramdisk/image_regions',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_w_Rel',
'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier/rel_classifier_q_obj_atr-4',
'outdir': '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier_Margin',
'rel_model': '/home/tanmay/Code/GenVQA/Exp_Results/Rel_Classifier_Obj_Atr_Explt/rel_classifier_q_obj_atr_reg_explt-9',
'obj_atr_model': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier-1',
'adam_lr' : 0.001,
'mode' : 'q',
'adam_lr' : 0.0001,
'mode' : 'q_obj_atr',
'crop_n_save_regions': False,
'max_epoch': 5,
'max_epoch': 10,
'batch_size': 10,
'fine_tune': False,
'fine_tune': True,
'start_model': 4, # When fine_tune is false used to pre-initialize q_obj_atr with q model etc
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment