Skip to content
Snippets Groups Projects
Commit 28db477b authored by tgupta6's avatar tgupta6
Browse files

add no pretraining qa training script

parent fdf8d455
No related branches found
No related tags found
No related merge requests found
......@@ -213,7 +213,6 @@ class graph_creator():
name = 'embedding_lookup_' + name)
noun_embed[name].append(embed)
name = 'positive_adjectives'
adjective_embed[name] = []
for j in xrange(self.batch_size):
......@@ -370,7 +369,7 @@ class graph_creator():
self.regularization_loss = self.regularization()
self.total_loss = 0.1*(self.object_loss + 1000.0*self.attribute_loss) + \
self.total_loss = 0.0*(self.object_loss + 1000.0*self.attribute_loss) + \
self.regularization_loss + \
self.answer_loss
......@@ -755,7 +754,7 @@ if __name__=='__main__':
print 'Starting a session...'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.per_process_gpu_memory_fraction = 0.5
sess = tf.Session(config=config, graph=graph.tf_graph)
print 'Creating initializer...'
......
from word2vec.word_vector_management import word_vector_manager
import object_attribute_classifier_cached_features.inference as feature_graph
import region_relevance_network.inference as relevance_graph
import answer_classifier_cached_features.inference as answer_graph
from tftools import var_collect, placeholder_management
import tftools.data
import losses
import constants
import tftools.var_collect as var_collect
import data.vqa_cached_features as vqa_data
import data.cropped_regions_cached_features as genome_data
import numpy as np
import pdb
from itertools import izip
import tensorflow as tf
class graph_creator():
def __init__(
self,
tb_log_dir,
batch_size,
image_size,
num_neg_answers,
space_dim,
regularization_coeff,
num_regions_wo_labels,
num_regions_w_labels,
num_object_labels,
num_attribute_labels,
resnet_feat_dim=2048,
training=True):
self.im_h, self.im_w = image_size
self.num_neg_answers = num_neg_answers
self.space_dim = space_dim
self.batch_size = batch_size
self.regularization_coeff = regularization_coeff
self.num_regions_wo_labels = num_regions_wo_labels
self.num_regions_w_labels = num_regions_w_labels
self.num_object_labels = num_object_labels
self.num_attribute_labels = num_attribute_labels
self.resnet_feat_dim = resnet_feat_dim
self.training = training
self.tf_graph = tf.Graph()
with self.tf_graph.as_default():
self.create_placeholders()
self.word_vec_mgr = word_vector_manager()
if self.training:
self.concat_feats = tf.concat(
0,
[self.plh['region_feats'], self.plh['region_feats_with_labels']])
else:
self.concat_feats = self.plh['region_feats']
self.obj_atr_inference = feature_graph.ObjectAttributeInference(
self.concat_feats,
self.word_vec_mgr.object_label_vectors,
self.word_vec_mgr.attribute_label_vectors,
training)
if self.training:
self.split_obj_atr_inference_output()
self.object_feat = self.object_embed_with_answers
self.attribute_feat = self.attribute_embed_with_answers
else:
self.object_feat = self.obj_atr_inference.object_embed
self.attribute_feat = self.obj_atr_inference.attribute_embed
self.object_feat = tf.split(
0,
self.batch_size,
self.object_feat)
self.attribute_feat = tf.split(
0,
self.batch_size,
self.attribute_feat)
self.question_embed, self.question_embed_concat = \
self.get_question_embeddings()
self.answers_embed, self.answers_embed_concat = \
self.get_answer_embeddings()
self.noun_embed, self.adjective_embed = \
self.get_noun_adjective_embeddings()
self.relevance_inference = \
relevance_graph.RegionRelevanceInference(
self.batch_size,
self.object_feat,
self.attribute_feat,
self.noun_embed,
self.adjective_embed
)
self.answer_inference = answer_graph.AnswerInference(
self.object_feat,
self.attribute_feat,
self.relevance_inference.answer_region_prob,
self.question_embed_concat,
self.answers_embed_concat,
self.num_neg_answers + 1,
self.space_dim,
self.plh['keep_prob'])
self.add_losses()
self.add_accuracy_computation()
self.collect_variables()
self.vars_to_save = tf.all_variables()
self.merged = tf.merge_all_summaries()
self.writer = tf.train.SummaryWriter(
tb_log_dir,
graph = self.tf_graph)
def create_placeholders(self):
self.plh = placeholder_management.PlaceholderManager()
self.plh.add_placeholder(
'keep_prob',
tf.float32,
shape=[])
self.plh.add_placeholder(
'region_feats',
tf.float32,
shape=[None, self.resnet_feat_dim])
if self.training:
self.plh.add_placeholder(
'region_feats_with_labels',
tf.float32,
shape=[None, self.resnet_feat_dim])
self.plh.add_placeholder(
'object_labels',
tf.float32,
shape=[None, self.num_object_labels])
self.plh.add_placeholder(
'attribute_labels',
tf.float32,
shape=[None, self.num_attribute_labels])
for i in xrange(self.num_neg_answers):
answer_name = 'negative_answer_' + str(i)
self.plh.add_placeholder(
answer_name,
tf.int64,
shape=[None],
size=self.batch_size)
self.plh.add_placeholder(
'positive_answer',
tf.int64,
shape=[None],
size=self.batch_size)
for i in xrange(4):
bin_name = 'bin_' + str(i)
self.plh.add_placeholder(
bin_name,
tf.int64,
shape=[None],
size=self.batch_size)
self.plh.add_placeholder(
'positive_nouns',
tf.int64,
shape=[None],
size=self.batch_size)
self.plh.add_placeholder(
'positive_adjectives',
tf.int64,
shape=[None],
size=self.batch_size)
for i in xrange(self.num_neg_answers):
self.plh.add_placeholder(
'negative_nouns_' + str(i),
tf.int64,
shape=[None],
size=self.batch_size)
self.plh.add_placeholder(
'negative_adjectives_' + str(i),
tf.int64,
shape=[None],
size=self.batch_size)
def get_noun_adjective_embeddings(self):
with tf.variable_scope('noun_adjective_embed'):
noun_embed = dict()
adjective_embed = dict()
name = 'positive_nouns'
noun_embed[name] = []
for j in xrange(self.batch_size):
embed = tf.nn.embedding_lookup(
self.word_vec_mgr.word_vectors,
self.plh[name][j],
name = 'embedding_lookup_' + name)
noun_embed[name].append(embed)
for i in xrange(self.num_neg_answers):
name = 'negative_nouns_' + str(i)
noun_embed[name] = []
for j in xrange(self.batch_size):
embed = tf.nn.embedding_lookup(
self.word_vec_mgr.word_vectors,
self.plh[name][j],
name = 'embedding_lookup_' + name)
noun_embed[name].append(embed)
name = 'positive_adjectives'
adjective_embed[name] = []
for j in xrange(self.batch_size):
embed = tf.nn.embedding_lookup(
self.word_vec_mgr.word_vectors,
self.plh[name][j],
name = 'embedding_lookup_' + name)
adjective_embed[name].append(embed)
for i in xrange(self.num_neg_answers):
name = 'negative_adjectives_' + str(i)
adjective_embed[name] = []
for j in xrange(self.batch_size):
embed = tf.nn.embedding_lookup(
self.word_vec_mgr.word_vectors,
self.plh[name][j],
name = 'embedding_lookup_' + name)
adjective_embed[name].append(embed)
return noun_embed, adjective_embed
def get_question_embeddings(self):
with tf.variable_scope('question_bin_embed'):
question_bin_embed = dict()
tensor_list = [[] for i in xrange(self.batch_size)]
for i in xrange(4):
bin_name = 'bin_' + str(i)
question_bin_embed[bin_name] = []
for j in xrange(self.batch_size):
embed = self.lookup_word_embeddings(
self.plh[bin_name][j],
bin_name)
question_bin_embed[bin_name].append(embed)
tensor_list[j].append(embed)
question_bin_embed_concat = []
for j in xrange(self.batch_size):
embed_concat = tf.concat(
0,
tensor_list[j],
name = 'concat_question_bins')
question_bin_embed_concat.append(embed_concat)
return question_bin_embed, question_bin_embed_concat
def get_answer_embeddings(self):
with tf.variable_scope('answers_embed'):
answers_embed = dict()
tensor_list = [[] for i in xrange(self.batch_size)]
answer_name = 'positive_answer'
answers_embed[answer_name] = []
for j in xrange(self.batch_size):
embed = self.lookup_word_embeddings(
self.plh[answer_name][j],
answer_name)
answers_embed[answer_name].append(embed)
tensor_list[j].append(embed)
for i in xrange(self.num_neg_answers):
answer_name = 'negative_answer_' + str(i)
answers_embed[answer_name] = []
for j in xrange(self.batch_size):
embed = self.lookup_word_embeddings(
self.plh[answer_name][j],
answer_name)
answers_embed[answer_name].append(embed)
tensor_list[j].append(embed)
answers_embed_concat = []
for j in xrange(self.batch_size):
embed_concat = tf.concat(
0,
tensor_list[j],
name = 'concat_answers')
answers_embed_concat.append(embed_concat)
return answers_embed, answers_embed_concat
def lookup_word_embeddings(self, index_list, name):
with tf.variable_scope(name):
word_vectors = tf.nn.embedding_lookup(
self.word_vec_mgr.word_vectors,
index_list,
name = 'embedding_lookup')
embedding = tf.reduce_mean(
word_vectors,
0,
True,
'reduce_mean')
return embedding
def split_obj_atr_inference_output(self):
with tf.variable_scope('split'):
self.object_embed_with_answers = tf.slice(
self.obj_atr_inference.object_embed,
[0, 0],
[self.num_regions_wo_labels, -1])
self.object_scores_with_labels = tf.slice(
self.obj_atr_inference.object_scores,
[self.num_regions_wo_labels, 0],
[-1, -1])
self.attribute_embed_with_answers = tf.slice(
self.obj_atr_inference.attribute_embed,
[0, 0],
[self.num_regions_wo_labels, -1])
self.attribute_scores_with_labels = tf.slice(
self.obj_atr_inference.attribute_scores,
[self.num_regions_wo_labels, 0],
[-1, -1])
def add_losses(self):
y = np.zeros([1, self.num_neg_answers + 1])
y[0,0] = 1.0
y = tf.constant(y, dtype=tf.float32)
self.answer_loss = 0
for j in xrange(self.batch_size):
self.answer_loss += losses.answer_loss(
self.answer_inference.answer_score[j],
y)
self.answer_loss /= self.batch_size
if self.training:
self.object_loss = losses.object_loss(
#self.obj_atr_inference.object_scores,
self.object_scores_with_labels,
self.plh['object_labels'])
#object_labels)
object_loss_summary = tf.scalar_summary(
"loss_object",
self.object_loss)
self.attribute_loss = losses.attribute_loss(
#self.obj_atr_inference.attribute_scores,
self.attribute_scores_with_labels,
self.plh['attribute_labels'],
#attribute_labels,
self.num_regions_w_labels)
attribute_loss_summary = tf.scalar_summary(
"loss_attribute",
self.attribute_loss)
# self.object_loss = 0.0
# self.attribute_loss = 0.0
else:
self.object_loss = 0.0
self.attribute_loss = 0.0
self.regularization_loss = self.regularization()
self.total_loss = 0.0*(self.object_loss + 1000.0*self.attribute_loss) + \
self.regularization_loss + \
self.answer_loss
ema = tf.train.ExponentialMovingAverage(0.95, name='ema')
update_op = ema.apply([self.answer_loss])
moving_average_answer_loss = ema.average(self.answer_loss)
with tf.control_dependencies([update_op]):
answer_loss_summary = tf.scalar_summary(
"loss_answer",
moving_average_answer_loss)
regularization_loss_summary = tf.scalar_summary(
"loss_regularization",
self.regularization_loss)
total_loss_summary = tf.scalar_summary(
"loss_total",
self.total_loss)
def regularization(self):
vars_to_regularize = tf.get_collection('to_regularize')
loss = losses.regularization_loss(
vars_to_regularize,
self.regularization_coeff)
return loss
def add_accuracy_computation(self):
with tf.variable_scope('accuracy_graph'):
self.answer_accuracy, self.answer_accuracy_ema, \
self.update_answer_accuracy_op = \
self.add_answer_accuracy_computation(
self.answer_inference.answer_score)
self.moving_average_accuracy = self.answer_accuracy_ema.average(
self.answer_accuracy)
with tf.control_dependencies([self.update_answer_accuracy_op]):
answer_accuracy_summary = tf.scalar_summary(
["accuracy_answer"],
self.moving_average_accuracy)
def add_answer_accuracy_computation(self, scores):
with tf.variable_scope('answer_accuracy'):
accuracy = 0.0
for j in xrange(self.batch_size):
is_correct = tf.equal(
tf.argmax(scores[j],1),
tf.constant(0,dtype=tf.int64))
accuracy += tf.cast(is_correct, tf.float32)
accuracy /= self.batch_size
ema = tf.train.ExponentialMovingAverage(0.95, name='ema')
update_accuracy_op = ema.apply([accuracy])
return accuracy, ema, update_accuracy_op
def collect_variables(self):
self.word_vec_vars = var_collect.collect_scope('word_vectors')
self.resnet_vars = self.obj_atr_inference.resnet_vars
self.object_attribute_vars = \
var_collect.collect_scope('object_graph') + \
var_collect.collect_scope('attribute_graph') + \
var_collect.collect_scope('bn')
self.answer_vars = var_collect.collect_scope('answer_graph')
def create_initializer(graph, sess, model):
class initializer():
def __init__(self):
with graph.tf_graph.as_default():
all_vars = tf.all_variables()
var_collect.print_var_list(
all_vars,
'vars_to_init')
self.init = tf.initialize_variables(all_vars)
def initialize(self):
sess.run(self.init)
return initializer()
def create_vqa_batch_generator():
data_mgr = vqa_data.data(
constants.vqa_train_resnet_feat_dir,
constants.vqa_train_anno,
constants.vocab_json,
constants.vqa_answer_vocab_json,
constants.image_size,
constants.num_region_proposals,
constants.num_negative_answers,
resnet_feat_dim=constants.resnet_feat_dim)
index_generator = tftools.data.random(
constants.answer_batch_size,
constants.num_train_questions,
constants.answer_num_epochs,
constants.answer_offset)
batch_generator = tftools.data.async_batch_generator(
data_mgr,
index_generator,
constants.answer_queue_size)
return batch_generator
def create_vgenome_batch_generator():
data_mgr = genome_data.data(
constants.genome_resnet_feat_dir,
constants.image_dir,
constants.object_labels_json,
constants.attribute_labels_json,
constants.regions_json,
constants.image_size,
channels=3,
resnet_feat_dim=constants.resnet_feat_dim,
mean_image_filename=None)
index_generator = tftools.data.random(
constants.num_regions_with_labels,
constants.region_num_samples,
constants.region_num_epochs,
constants.region_offset)
batch_generator = tftools.data.async_batch_generator(
data_mgr,
index_generator,
constants.region_queue_size)
return batch_generator
def create_batch_generator():
vqa_generator = create_vqa_batch_generator()
vgenome_generator = create_vgenome_batch_generator()
generator = izip(vqa_generator, vgenome_generator)
return generator
class attach_optimizer():
def __init__(self, graph, lr):
self.graph = graph
self.lr = lr
with graph.tf_graph.as_default():
all_trainable_vars = tf.trainable_variables()
self.not_to_train = []#+ graph.object_attribute_vars
vars_to_train = [
var for var in all_trainable_vars
if var not in self.not_to_train]
#vars_to_train = graph.resnet_vars
var_collect.print_var_list(
vars_to_train,
'vars_to_train')
all_vars = tf.all_variables()
self.ops = dict()
self.add_adam_optimizer(
graph.total_loss,
vars_to_train,
'optimizer')
self.train_op = self.group_all_train_ops()
all_vars_with_opt_vars = tf.all_variables()
self.opt_vars = [var for var in all_vars_with_opt_vars if var not in all_vars]
def filter_out_vars_to_train(self, var_list):
return [var for var in var_list if var not in self.not_to_train]
def add_adam_optimizer(self, loss, var_list, name):
var_list = self.filter_out_vars_to_train(var_list)
if not var_list:
self.ops[name] = []
return
train_step = tf.train.AdamOptimizer(self.lr) \
.minimize(
loss,
var_list = var_list)
self.ops[name] = train_step
def group_all_train_ops(self):
train_op = tf.group()
for op in self.ops.values():
if op:
train_op = tf.group(train_op, op)
# check_op = tf.add_check_numerics_ops()
# train_op = tf.group(train_op, check_op)
return train_op
def create_feed_dict_creator(plh, num_neg_answers):
def feed_dict_creator(batch):
vqa_batch, vgenome_batch = batch
batch_size = len(vqa_batch['question'])
# Create vqa inputs
inputs = {
'region_feats': np.concatenate(vqa_batch['region_feats'], axis=0),
'positive_answer': vqa_batch['positive_answer'],
}
for i in xrange(4):
bin_name = 'bin_' + str(i)
inputs[bin_name] = [
vqa_batch['question'][j][bin_name] for j in xrange(batch_size)]
for i in xrange(num_neg_answers):
answer_name = 'negative_answer_' + str(i)
inputs[answer_name] = [
vqa_batch['negative_answers'][j][i] for j in xrange(batch_size)]
inputs['positive_nouns'] = [
a + b for a, b in zip(
vqa_batch['question_nouns'],
vqa_batch['positive_answer_nouns'])]
inputs['positive_adjectives'] = [
a + b for a, b in zip(
vqa_batch['question_adjectives'],
vqa_batch['positive_answer_adjectives'])]
for i in xrange(num_neg_answers):
name = 'negative_nouns_' + str(i)
list_ith_negative_answer_nouns = [
vqa_batch['negative_answers_nouns'][j][i]
for j in xrange(batch_size)]
inputs[name] = [
a + b for a, b in zip(
vqa_batch['question_nouns'],
list_ith_negative_answer_nouns)]
name = 'negative_adjectives_' + str(i)
list_ith_negative_answer_adjectives = [
vqa_batch['negative_answers_adjectives'][j][i]
for j in xrange(batch_size)]
inputs[name] = [
a + b for a, b in zip(
vqa_batch['question_adjectives'],
list_ith_negative_answer_adjectives)]
# Create vgenome inputs
inputs['region_feats_with_labels'] = vgenome_batch['region_feats']
inputs['object_labels'] = vgenome_batch['object_labels']
inputs['attribute_labels'] = vgenome_batch['attribute_labels']
inputs['keep_prob'] = 0.8
return plh.get_feed_dict(inputs)
return feed_dict_creator
class log_mgr():
def __init__(
self,
graph,
vars_to_save,
sess,
log_every_n_iter,
output_dir,
model_path):
self.graph = graph
self.vars_to_save = vars_to_save
self.sess = sess
self.log_every_n_iter = log_every_n_iter
self.output_dir = output_dir
self.model_path = model_path
self.model_saver = tf.train.Saver(
var_list = vars_to_save,
max_to_keep = 0)
self.loss_values = dict()
def log(self, iter, is_last=False, eval_vars_dict=None):
if eval_vars_dict:
self.graph.writer.add_summary(
eval_vars_dict['merged'],
iter)
print 'Word Vector shape: {}'.format(
eval_vars_dict['word_vectors'].shape)
print np.max(eval_vars_dict['word_vectors'])
print np.min(eval_vars_dict['word_vectors'])
print 'Object Scores shape: {}'.format(
eval_vars_dict['object_scores'].shape)
print np.max(eval_vars_dict['object_scores'])
print 'Attribute Scores shape: {}'.format(
eval_vars_dict['attribute_scores'].shape)
print np.max(eval_vars_dict['attribute_scores'])
print 'Answer Scores shape: {}'.format(
eval_vars_dict['answer_scores'].shape)
print np.max(eval_vars_dict['answer_scores'])
print 'Relevance Prob shape: {}'.format(
eval_vars_dict['relevance_prob'].shape)
print np.max(eval_vars_dict['relevance_prob'])
print 'Per region answer prob shape: {}'.format(
eval_vars_dict['per_region_answer_prob'].shape)
print np.max(eval_vars_dict['per_region_answer_prob'])
if iter % self.log_every_n_iter==0 or is_last:
self.model_saver.save(
self.sess,
self.model_path,
global_step=iter)
def train(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
logger):
vars_to_eval_names = []
vars_to_eval = []
for var_name, var in vars_to_eval_dict.items():
vars_to_eval_names += [var_name]
vars_to_eval += [var]
with sess.as_default():
initializer.initialize()
iter = 0
for batch in batch_generator:
print '---'
print 'Iter: {}'.format(iter)
feed_dict = feed_dict_creator(batch)
eval_vars = sess.run(
vars_to_eval,
feed_dict = feed_dict)
eval_vars_dict = {
var_name: eval_var for var_name, eval_var in
zip(vars_to_eval_names, eval_vars)}
logger.log(iter, False, eval_vars_dict)
iter+=1
logger.log(iter-1, True, eval_vars_dict)
if __name__=='__main__':
print 'Creating batch generator...'
batch_generator = create_batch_generator()
print 'Creating computation graph...'
graph = graph_creator(
constants.tb_log_dir,
constants.answer_batch_size,
constants.image_size,
constants.num_negative_answers,
constants.answer_embedding_dim,
constants.answer_regularization_coeff,
constants.answer_batch_size*constants.num_region_proposals,
constants.num_regions_with_labels,
constants.num_object_labels,
constants.num_attribute_labels,
resnet_feat_dim=constants.resnet_feat_dim,
training=True)
print 'Attaching optimizer...'
optimizer = attach_optimizer(
graph,
constants.answer_lr)
print 'Starting a session...'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.5
sess = tf.Session(config=config, graph=graph.tf_graph)
print 'Creating initializer...'
initializer = create_initializer(
graph,
sess,
constants.pretrained_model)
print 'Creating feed dict creator...'
feed_dict_creator = create_feed_dict_creator(
graph.plh,
constants.num_negative_answers)
print 'Creating dict of vars to be evaluated...'
vars_to_eval_dict = {
'optimizer_op': optimizer.train_op,
'word_vectors': graph.word_vec_mgr.word_vectors,
'relevance_prob': graph.relevance_inference.answer_region_prob[0],
'per_region_answer_prob': graph.answer_inference.per_region_answer_prob[0],
'object_scores': graph.obj_atr_inference.object_scores,
'attribute_scores': graph.obj_atr_inference.attribute_scores,
'answer_scores': graph.answer_inference.answer_score[0],
'accuracy': graph.moving_average_accuracy,
'total_loss': graph.total_loss,
# 'question_embed_concat': graph.question_embed_concat,
# 'answer_embed_concat': graph.answers_embed_concat,
# 'noun_embed': graph.noun_embed['positive_nouns'],
# 'adjective_embed': graph.adjective_embed['positive_adjectives'],
# 'assert': graph.answer_inference.assert_op,
'merged': graph.merged,
}
print 'Creating logger...'
vars_to_save = graph.vars_to_save
logger = log_mgr(
graph,
graph.vars_to_save,
sess,
constants.answer_log_every_n_iter,
constants.answer_output_dir,
constants.answer_model)
print 'Start training...'
train(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
logger)
......@@ -160,7 +160,7 @@ answer_offset = 0
answer_regularization_coeff = 1e-5
answer_queue_size = 500
answer_embedding_dim = 600
answer_lr = 1e-4
answer_lr = 1e-3
answer_log_every_n_iter = 500
answer_output_dir = os.path.join(
global_experiment_dir,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment