Skip to content
Snippets Groups Projects
Commit 4d7f6f09 authored by tgupta6's avatar tgupta6
Browse files

object attribute classifier training, fine-tune, test setup

parent 22e54b1e
No related branches found
No related tags found
No related merge requests found
import os import os
import pdb
def mkdir_if_not_exists(dir_name):
if not os.path.exists(dir_name):
os.mkdir(dir_name)
experiment_name = '2'
# Global output directory (all subexperiments will be saved here)
global_output_dir = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome'
global_experiment_dir = os.path.join(
global_output_dir,
experiment_name)
tb_log_dir = os.path.join(
global_experiment_dir,
'tensorboard_logdir')
mkdir_if_not_exists(global_output_dir)
mkdir_if_not_exists(global_experiment_dir)
mkdir_if_not_exists(tb_log_dir)
#height and width to which images are resized before feeding into networks #height and width to which images are resized before feeding into networks
image_size = (224, 224) image_size = (224, 224)
...@@ -27,17 +48,19 @@ mean_image_filename = os.path.join( ...@@ -27,17 +48,19 @@ mean_image_filename = os.path.join(
data_absolute_path, data_absolute_path,
'restructured/mean_image.jpg') 'restructured/mean_image.jpg')
# Vocabulary
vocab_json = os.path.join( vocab_json = os.path.join(
data_absolute_path, data_absolute_path,
'restructured/vocab_subset.json') 'restructured/vocab_subset.json')
num_object_labels = 1000
num_attribute_labels = 1000
# Regions data partition # Regions data partition
# First 70% meant to be used for training # First 80% meant to be used for training
# Next 10% is set aside for validation # Next 10% is set aside for validation
# Last 20% is to be used for testing # Last 10% is to be used for testing
num_total_regions = 1951768 num_total_regions = 1951768
num_train_regions = 1366238 # First 70% num_train_regions = 1561416 # First 80%
num_val_regions = 195176 # Next 10% num_val_regions = 195176 # Next 10%
num_test_regions = num_total_regions \ num_test_regions = num_total_regions \
- num_train_regions \ - num_train_regions \
...@@ -55,12 +78,37 @@ word2vec_binary = '/home/tanmay/Code/word2vec/word2vec-api-master/' + \ ...@@ -55,12 +78,37 @@ word2vec_binary = '/home/tanmay/Code/word2vec/word2vec-api-master/' + \
word_vector_size = 300 word_vector_size = 300
# Numpy matrix storing vocabulary word vectors # Numpy matrix storing vocabulary word vectors
vocab_word_vectors_npy = os.path.join( pretrained_vocab_word_vectors_npy = os.path.join(
data_absolute_path, data_absolute_path,
'restructured/vocab_word_vectors.npy') 'restructured/pretrained_vocab_word_vectors.npy')
# Object Attribute Classifier Training Params
region_batch_size = 100
region_num_samples = num_total_regions
region_num_epochs = 10
region_offset = 0
region_queue_size = 400
region_regularization_coeff = 1e-4
region_lr = 1e-2
region_log_every_n_iter = 400
region_output_dir = os.path.join(
global_experiment_dir,
'object_attribute_classifiers')
mkdir_if_not_exists(region_output_dir)
region_model = os.path.join(
region_output_dir,
'model')
region_fine_tune_from_iter = 18800
region_fine_tune_from = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome/' + \
'1/object_attribute_classifiers/model-' + \
str(region_fine_tune_from_iter)
region_fine_tune_lr = 1e-2
# Object Attribute Classifier Evaluation Params
region_eval_on = 'val' # One of {'val','test'}
region_model_to_eval = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome/' + \
'2/object_attribute_classifiers/model-22000'
import numpy as np import numpy as np
import json #import json
import ujson
import os import os
import pdb import pdb
import time import time
...@@ -54,13 +55,13 @@ class data(): ...@@ -54,13 +55,13 @@ class data():
def read_json_file(self, filename): def read_json_file(self, filename):
print 'Reading {} ...'.format(filename) print 'Reading {} ...'.format(filename)
with open(filename, 'r') as file: with open(filename, 'r') as file:
return json.load(file) return ujson.load(file)
def get(self, samples): def get(self, samples):
batch_size = len(samples) batch_size = len(samples)
batch = dict() batch = dict()
batch['region_ids'] = dict() batch['region_ids'] = dict()
batch['images'] = np.zeros( batch['region_images'] = np.zeros(
[batch_size, self.h, self.w, self.c], np.float32) [batch_size, self.h, self.w, self.c], np.float32)
batch['object_labels'] = np.zeros( batch['object_labels'] = np.zeros(
[batch_size, len(self.object_labels_dict)], np.float32) [batch_size, len(self.object_labels_dict)], np.float32)
...@@ -68,7 +69,7 @@ class data(): ...@@ -68,7 +69,7 @@ class data():
[batch_size, len(self.attribute_labels_dict)], np.float32) [batch_size, len(self.attribute_labels_dict)], np.float32)
for index, sample in enumerate(samples): for index, sample in enumerate(samples):
batch['region_ids'][index] = self.sample_to_region_dict[sample] batch['region_ids'][index] = self.sample_to_region_dict[sample]
batch['images'][index, :, :, :], read_success = \ batch['region_images'][index, :, :, :], read_success = \
self.get_region_image(sample) self.get_region_image(sample)
if read_success: if read_success:
batch['object_labels'][index, :] = self.get_object_label(sample) batch['object_labels'][index, :] = self.get_object_label(sample)
...@@ -79,19 +80,22 @@ class data(): ...@@ -79,19 +80,22 @@ class data():
def get_single(self, sample, batch_list, worker_id): def get_single(self, sample, batch_list, worker_id):
try: try:
batch = dict() batch = dict()
batch['region_ids'] = dict() batch['region_id'] = self.sample_to_region_dict[sample]
batch['images'] = np.zeros( batch['region_image'], read_success = self.get_region_image(sample)
[self.h, self.w, self.c], np.float32)
batch['object_labels'] = np.zeros(
[len(self.object_labels_dict)], np.float32)
batch['attribute_labels'] = np.zeros(
[len(self.attribute_labels_dict)], np.float32)
batch['region_ids'] = self.sample_to_region_dict[sample]
batch['images'], read_success = self.get_region_image(sample)
if read_success: if read_success:
batch['object_labels'] = self.get_object_label(sample) batch['object_label'], batch['object_label_words'] = \
batch['attribute_labels'] = self.get_attribute_label(sample) self.get_object_label(sample)
batch['attribute_label'], batch['attribute_label_words'] = \
self.get_attribute_label(sample)
else:
batch['region_image'] = np.zeros(
[self.h, self.w, self.c], np.float32)
batch['object_label'] = np.zeros(
[len(self.object_labels_dict)], np.float32)
batch['attribute_label'] = np.zeros(
[len(self.attribute_labels_dict)], np.float32)
batch['object_label_words'] = []
batch['attribute_label_words'] = []
batch_list[worker_id] = batch batch_list[worker_id] = batch
...@@ -111,24 +115,24 @@ class data(): ...@@ -111,24 +115,24 @@ class data():
worker.start() worker.start()
workers.append(worker) workers.append(worker)
for worker in jobs: for worker in workers:
worker.join() worker.join()
batch_size = len(samples) batch_size = len(samples)
batch = dict() batch = dict()
batch['region_ids'] = dict() batch['region_ids'] = dict()
batch['images'] = np.zeros( batch['region_images'] = np.zeros(
[batch_size, self.h, self.w, self.c], np.float32) [batch_size, self.h, self.w, self.c], np.float32)
batch['object_labels'] = np.zeros( batch['object_labels']= np.zeros(
[batch_size, len(self.object_labels_dict)], np.float32) [batch_size, len(self.object_labels_dict)], np.float32)
batch['attribute_labels'] = np.zeros( batch['attribute_labels'] = np.zeros(
[batch_size, len(self.attribute_labels_dict)], np.float32) [batch_size, len(self.attribute_labels_dict)], np.float32)
for index, single_batch in enumerate(batch_list): for index, single_batch in enumerate(batch_list):
batch['region_ids'][index] = single_batch['region_ids'] batch['region_ids'][index] = single_batch['region_id']
batch['images'][index, :, :, :] = single_batch['images'] batch['region_images'][index, :, :, :] = single_batch['region_image']
batch['object_labels'][index, :] = single_batch['object_labels'] batch['object_labels'][index, :] = single_batch['object_label']
batch['attribute_labels'][index,:] = single_batch['attribute_labels'] batch['attribute_labels'][index,:] = single_batch['attribute_label']
return batch return batch
...@@ -142,13 +146,13 @@ class data(): ...@@ -142,13 +146,13 @@ class data():
read_success = True read_success = True
try: try:
region_image = image_io.imread(filename) region_image = image_io.imread(filename)
region_image = region_image.astype(np.float32)
except: except:
print 'Could not read image: Setting the image pixels to 0s'
read_success = False read_success = False
region_image = np.zeros([self.h, self.w], dtype) region_image = np.zeros([self.h, self.w, 3], dtype=np.float32)
region_image = region_image.astype(np.float32)
return region_image / 255 - self.mean_image, read_success return region_image, read_success
def single_to_three_channel(self, image): def single_to_three_channel(self, image):
if len(image.shape)==3: if len(image.shape)==3:
...@@ -171,7 +175,7 @@ class data(): ...@@ -171,7 +175,7 @@ class data():
def get_mean_image(self, mean_image_filename): def get_mean_image(self, mean_image_filename):
if mean_image_filename: if mean_image_filename:
return image_io.imread(mean_image_filename).astype( return image_io.imread(mean_image_filename).astype(
np.float32) / 255 np.float32)
else: else:
return np.zeros([self.h, self.w, self.c], np.float32) return np.zeros([self.h, self.w, self.c], np.float32)
...@@ -195,7 +199,7 @@ class data(): ...@@ -195,7 +199,7 @@ class data():
label_id = self.object_labels_dict[_unknown_token] label_id = self.object_labels_dict[_unknown_token]
object_label_encoding[0,label_id] = 1.0 object_label_encoding[0,label_id] = 1.0
return object_label_encoding/np.sum(object_label_encoding) return object_label_encoding/np.sum(object_label_encoding), object_labels
def get_attribute_label(self, sample): def get_attribute_label(self, sample):
# Attribute is turned on if it is present # Attribute is turned on if it is present
...@@ -209,7 +213,7 @@ class data(): ...@@ -209,7 +213,7 @@ class data():
label_id = self.attribute_labels_dict[attribute] label_id = self.attribute_labels_dict[attribute]
attribute_label_encoding[0,label_id] = 1.0 attribute_label_encoding[0,label_id] = 1.0
return attribute_label_encoding return attribute_label_encoding, attribute_labels
if __name__=='__main__': if __name__=='__main__':
data_mgr = data(constants.image_dir, data_mgr = data(constants.image_dir,
...@@ -250,6 +254,7 @@ if __name__=='__main__': ...@@ -250,6 +254,7 @@ if __name__=='__main__':
num_samples = 200 num_samples = 200
num_epochs = 1 num_epochs = 1
offset = 0 offset = 0
queue_size = 100
index_generator = tftools.data.random( index_generator = tftools.data.random(
batch_size, batch_size,
...@@ -260,7 +265,7 @@ if __name__=='__main__': ...@@ -260,7 +265,7 @@ if __name__=='__main__':
batch_generator = tftools.data.async_batch_generator( batch_generator = tftools.data.async_batch_generator(
data_mgr, data_mgr,
index_generator, index_generator,
100) queue_size)
count = 0 count = 0
for batch in batch_generator: for batch in batch_generator:
......
import tensorflow as tf
def object_loss(scores, labels):
with tf.variable_scope('object_loss'):
loss_vector = tf.nn.softmax_cross_entropy_with_logits(
scores,
labels,
name='softmax_cross_entropy_with_logits')
loss = tf.reduce_mean(
loss_vector,
name='average_loss')
return loss
def attribute_loss(scores, labels):
with tf.variable_scope('attribute_loss'):
loss_matrix = tf.nn.sigmoid_cross_entropy_with_logits(
scores,
labels,
name='sigmoid_cross_entropy_with_logits')
# label_count = tf.reduce_mean(
# labels,
# 0,
# keep_dims=True,
# name='label_count')
# label_count = tf.truediv(
# label_count,
# tf.to_float(label_count.get_shape().as_list()[0]),
# name='normalized_label_count')
loss = tf.reduce_mean(
loss_matrix,
# tf.matmul(loss_matrix, tf.transpose(label_count)),
name='average_loss')
return loss
def regularization_loss(param_list, coeff):
regularizer = tf.zeros(shape=[])
for param in param_list:
regularizer += tf.nn.l2_loss(param)
return coeff*regularizer
import pdb
import os
import ujson
import numpy as np
import data.cropped_regions as cropped_regions
import tftools.data
from tftools import var_collect, placeholder_management
from object_attribute_classifier import inference
from word2vec.word_vector_management import word_vector_manager
import losses
import constants
import tensorflow as tf
eval_on = constants.region_eval_on
batch_size = constants.region_batch_size
num_epochs = constants.region_num_epochs
if eval_on=='val':
num_samples = constants.num_val_regions
offset = constants.num_train_regions
elif eval_on=='test':
num_samples = constants.num_test_regions
offset = constants.num_train_regions + \
constants.num_val_regions
else:
print "eval_on can only be either 'val' or 'test'"
queue_size = constants.region_queue_size
im_h, im_w = constants.image_size
num_object_labels = constants.num_object_labels
num_attribute_labels = constants.num_attribute_labels
model_to_eval = constants.region_model_to_eval
class graph_creator():
def __init__(self, training=True):
self.tf_graph = tf.Graph()
with self.tf_graph.as_default():
self.create_placeholders()
self.word_vec_mgr = word_vector_manager()
self.obj_atr_inference = inference.ObjectAttributeInference(
self.plh['region_images'],
self.word_vec_mgr.object_label_vectors,
self.word_vec_mgr.attribute_label_vectors,
training)
# self.add_losses()
self.vars_to_save = tf.all_variables()
def create_placeholders(self):
self.plh = placeholder_management.PlaceholderManager()
self.plh.add_placeholder(
'region_images',
tf.float32,
shape=[None, im_h, im_w, 3])
self.plh.add_placeholder(
'object_labels',
tf.float32,
shape=[None, num_object_labels])
self.plh.add_placeholder(
'attribute_labels',
tf.float32,
shape=[None, num_attribute_labels])
def add_losses(self):
self.object_loss = losses.object_loss(
self.obj_atr_inference.object_scores,
self.plh['object_labels'])
self.attribute_loss = losses.attribute_loss(
self.obj_atr_inference.attribute_scores,
self.plh['attribute_labels'])
self.regularization_loss = self.regularization()
self.total_loss = self.object_loss + \
self.attribute_loss + \
self.regularization_loss
def regularization(self):
vars_to_regularize = tf.get_collection('to_regularize')
loss = losses.regularization_loss(
vars_to_regularize,
regularization_coeff)
return loss
def create_initializer(graph, sess):
class initializer():
def __init__(self):
with graph.tf_graph.as_default():
model_restorer = tf.train.Saver(graph.vars_to_save)
model_restorer.restore(sess, model_to_eval)
def initialize(self):
pass
return initializer()
def create_batch_generator():
data_mgr = cropped_regions.data(
constants.image_dir,
constants.object_labels_json,
constants.attribute_labels_json,
constants.regions_json,
constants.image_size,
channels=3,
mean_image_filename=None)
index_generator = tftools.data.random(
batch_size,
num_samples,
num_epochs,
offset)
batch_generator = tftools.data.async_batch_generator(
data_mgr,
index_generator,
queue_size)
return batch_generator
def create_feed_dict_creator(plh):
def feed_dict_creator(batch):
inputs = {
'region_images': batch['region_images'],
'object_labels': batch['object_labels'],
'attribute_labels': batch['attribute_labels']
}
return plh.get_feed_dict(inputs)
return feed_dict_creator
class eval_mgr():
def __init__(self):
self.correct_objects = 0
self.correct_attributes = 0
self.num_iter = 0
self.num_object_samples = 0
self.num_attribute_samples = 0
def eval(self,
iter,
eval_vars_dict,
labels):
self.eval_object_accuracy(
eval_vars_dict['object_prob'],
labels['objects'])
self.eval_attribute_accuracy(
eval_vars_dict['attribute_prob'],
labels['attributes'])
def eval_object_accuracy(
self,
prob,
labels):
matches = np.equal(
np.argmax(prob, 1),
np.argmax(labels, 1)).astype(np.int32)
self.correct_objects += np.sum(matches)
self.num_object_samples += matches.shape[0]
def eval_attribute_accuracy(
self,
prob,
labels):
matches = np.equal(
prob > 0.5,
labels == 1).astype(np.int32)
self.correct_attributes += np.sum(matches)
self.num_attribute_samples += (matches.shape[0]*matches.shape[1])
pdb.set_trace()
def get_object_accuracy(self):
return self.correct_objects/float(self.num_object_samples)
def get_attribute_accuracy(self):
return self.correct_attributes/float(self.num_attribute_samples)
def eval(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
evaluator):
vars_to_eval_names = []
vars_to_eval = []
for var_name, var in vars_to_eval_dict.items():
vars_to_eval_names += [var_name]
vars_to_eval += [var]
with sess.as_default():
initializer.initialize()
iter = 0
for batch in batch_generator:
print iter
feed_dict = feed_dict_creator(batch)
eval_vars = sess.run(
vars_to_eval,
feed_dict = feed_dict)
eval_vars_dict = {
var_name: eval_var for var_name, eval_var in
zip(vars_to_eval_names, eval_vars)}
labels = dict()
labels['objects'] = batch['object_labels']
labels['attributes'] = batch['attribute_labels']
evaluator.eval(iter, eval_vars_dict, labels)
print 'Object accuracy: {}'.format(
evaluator.get_object_accuracy())
print 'Attribute accuracy: {}'.format(
evaluator.get_attribute_accuracy())
iter+=1
if __name__=='__main__':
print 'Creating batch generator...'
batch_generator = create_batch_generator()
print 'Creating computation graph...'
graph = graph_creator(False)
print 'Starting a session...'
sess = tf.Session(graph=graph.tf_graph)
print 'Creating initializer...'
initializer = create_initializer(graph, sess)
print 'Creating feed dict creator...'
feed_dict_creator = create_feed_dict_creator(graph.plh)
print 'Creating dict of vars to be evaluated...'
vars_to_eval_dict = {
'object_prob': graph.obj_atr_inference.object_prob,
'attribute_prob': graph.obj_atr_inference.attribute_prob,
}
print 'Creating evaluator...'
evaluator = eval_mgr()
print 'Start evaluating...'
eval(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
evaluator)
import pdb
import os
import ujson
import data.cropped_regions as cropped_regions
import tftools.data
from tftools import var_collect, placeholder_management
from object_attribute_classifier import inference
from word2vec.word_vector_management import word_vector_manager
import losses
import constants
import tensorflow as tf
batch_size = constants.region_batch_size
num_samples = constants.region_num_samples
num_epochs = constants.region_num_epochs
offset = constants.region_offset
queue_size = constants.region_queue_size
im_h, im_w = constants.image_size
num_object_labels = constants.num_object_labels
num_attribute_labels = constants.num_attribute_labels
regularization_coeff = constants.region_regularization_coeff
lr = constants.region_fine_tune_lr
log_every_n_iter = constants.region_log_every_n_iter
output_dir = constants.region_output_dir
model = constants.region_model
fine_tune_from_iter = constants.region_fine_tune_from_iter
fine_tune_from = constants.region_fine_tune_from
#resnet_model = constants.resnet_ckpt
class graph_creator():
def __init__(self, training=True):
self.tf_graph = tf.Graph()
with self.tf_graph.as_default():
self.create_placeholders()
self.word_vec_mgr = word_vector_manager()
self.obj_atr_inference = inference.ObjectAttributeInference(
self.plh['region_images'],
self.word_vec_mgr.object_label_vectors,
self.word_vec_mgr.attribute_label_vectors,
training)
self.add_losses()
self.vars_to_save = tf.all_variables()
def create_placeholders(self):
self.plh = placeholder_management.PlaceholderManager()
self.plh.add_placeholder(
'region_images',
tf.float32,
shape=[None, im_h, im_w, 3])
self.plh.add_placeholder(
'object_labels',
tf.float32,
shape=[None, num_object_labels])
self.plh.add_placeholder(
'attribute_labels',
tf.float32,
shape=[None, num_attribute_labels])
def add_losses(self):
self.object_loss = losses.object_loss(
self.obj_atr_inference.object_scores,
self.plh['object_labels'])
self.attribute_loss = losses.attribute_loss(
self.obj_atr_inference.attribute_scores,
self.plh['attribute_labels'])
self.regularization_loss = self.regularization()
self.total_loss = self.object_loss + \
self.attribute_loss + \
self.regularization_loss
def regularization(self):
vars_to_regularize = tf.get_collection('to_regularize')
loss = losses.regularization_loss(
vars_to_regularize,
regularization_coeff)
return loss
def create_initializer(graph, sess):
class initializer():
def __init__(self):
with graph.tf_graph.as_default():
model_restorer = tf.train.Saver(graph.vars_to_save)
model_restorer.restore(sess, fine_tune_from)
all_vars = tf.all_variables()
other_vars = [var for var in all_vars
if var not in graph.vars_to_save]
var_collect.print_var_list(
other_vars,
'optimizer_vars')
self.init = tf.initialize_variables(other_vars)
def initialize(self):
sess.run(self.init)
return initializer()
def create_batch_generator():
data_mgr = cropped_regions.data(
constants.image_dir,
constants.object_labels_json,
constants.attribute_labels_json,
constants.regions_json,
constants.image_size,
channels=3,
mean_image_filename=None)
index_generator = tftools.data.random(
batch_size,
num_samples,
num_epochs,
offset)
batch_generator = tftools.data.async_batch_generator(
data_mgr,
index_generator,
queue_size)
return batch_generator
def create_feed_dict_creator(plh):
def feed_dict_creator(batch):
inputs = {
'region_images': batch['region_images'],
'object_labels': batch['object_labels'],
'attribute_labels': batch['attribute_labels']
}
return plh.get_feed_dict(inputs)
return feed_dict_creator
class attach_optimizer():
def __init__(self, graph):
with graph.tf_graph.as_default():
vars_to_train = tf.trainable_variables()
var_collect.print_var_list(
vars_to_train,
'vars_to_train')
self.ops = dict()
self.add_adam_optimizer(
graph.total_loss,
vars_to_train,
'all_trainable_vars')
def add_adam_optimizer(self, loss, var_list, name):
train_step = tf.train.AdamOptimizer(lr) \
.minimize(
loss,
var_list = var_list)
self.ops[name] = train_step
class log_mgr():
def __init__(
self,
vars_to_save,
sess,
log_every_n_iter,
output_dir,
model_path):
self.vars_to_save = vars_to_save
self.sess = sess
self.log_every_n_iter = log_every_n_iter
self.output_dir = output_dir
self.model_path = model_path
self.model_saver = tf.train.Saver(
var_list = vars_to_save,
max_to_keep = 0)
self.loss_values = dict()
def log(self, iter, is_last=False, eval_vars_dict=None):
if eval_vars_dict:
self.loss_values[str(iter)] = {
'total_loss': str(eval_vars_dict['total_loss']),
'object_loss': str(eval_vars_dict['object_loss']),
'attribute_loss': str(eval_vars_dict['attribute_loss'])}
if iter % self.log_every_n_iter==0 or is_last:
self.model_saver.save(
self.sess,
self.model_path,
global_step=iter)
loss_path = os.path.join(
self.output_dir,
'losses_' + str(iter) + '.json')
with open(loss_path, 'w') as outfile:
ujson.dump(
self.loss_values,
outfile,
sort_keys=True,
indent=4)
def train(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
logger):
vars_to_eval_names = []
vars_to_eval = []
for var_name, var in vars_to_eval_dict.items():
vars_to_eval_names += [var_name]
vars_to_eval += [var]
with sess.as_default():
initializer.initialize()
iter = fine_tune_from_iter+1
for batch in batch_generator:
print iter
feed_dict = feed_dict_creator(batch)
eval_vars = sess.run(
vars_to_eval,
feed_dict = feed_dict)
eval_vars_dict = {
var_name: eval_var for var_name, eval_var in
zip(vars_to_eval_names, eval_vars)}
logger.log(iter, False, eval_vars_dict)
iter+=1
logger.log(iter-1, True, eval_vars_dict)
if __name__=='__main__':
print 'Creating batch generator...'
batch_generator = create_batch_generator()
print 'Creating computation graph...'
graph = graph_creator()
print 'Attaching optimizer...'
optimizer = attach_optimizer(graph)
print 'Starting a session...'
sess = tf.Session(graph=graph.tf_graph)
print 'Creating initializer...'
initializer = create_initializer(graph, sess)
print 'Creating feed dict creator...'
feed_dict_creator = create_feed_dict_creator(graph.plh)
print 'Creating dict of vars to be evaluated...'
vars_to_eval_dict = {
'object_prob': graph.obj_atr_inference.object_prob,
'attribute_prob': graph.obj_atr_inference.attribute_prob,
'total_loss': graph.total_loss,
'object_loss': graph.object_loss,
'attribute_loss': graph.attribute_loss,
'optimizer_op': optimizer.ops['all_trainable_vars']
}
print 'Creating logger...'
vars_to_save = graph.vars_to_save
logger = log_mgr(
vars_to_save,
sess,
log_every_n_iter,
output_dir,
model)
print 'Start training...'
train(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
logger)
...@@ -3,26 +3,92 @@ import pdb ...@@ -3,26 +3,92 @@ import pdb
import resnet.inference as resnet_inference import resnet.inference as resnet_inference
from tftools import var_collect, placeholder_management, layers from tftools import var_collect, placeholder_management, layers
import constants import constants
from word2vec.word_vector_management import word_vector_manager
import losses
import tensorflow as tf import tensorflow as tf
class ObjectAttributeInference(): class ObjectAttributeInference():
def __init__( def __init__(
self, self,
image_regions, image_regions,
wordvecs, object_label_vectors,
attribute_label_vectors,
training): training):
self.image_regions = image_regions self.image_regions = image_regions
self.wordvecs = wordvecs
self.training = training self.training = training
avg_pool_feat = resnet_inference.inference( self.avg_pool_feat = resnet_inference.inference(
self.image_regions, self.image_regions,
self.training) self.training,
num_classes=None)
object_feat = self.add_object_graph(avg_pool_feat)
attribute_feat = self.add_attribute_graph(avg_pool_feat)
pdb.set_trace() self.avg_pool_feat = layers.batch_norm(
self.avg_pool_feat,
tf.constant(self.training))
self.resnet_vars = self.get_resnet_vars()
self.object_embed = self.add_object_graph(self.avg_pool_feat)
self.attribute_embed = self.add_attribute_graph(self.avg_pool_feat)
self.object_label_embed = self.add_object_label_graph(object_label_vectors)
self.attribute_label_embed = self.add_attribute_label_graph(attribute_label_vectors)
with tf.variable_scope('object_score_graph'):
self.object_scores = self.compute_cosine_similarity(
self.object_embed,
self.object_label_embed)
self.object_scores_alpha = tf.get_variable(
'object_alpha',
shape=[self.object_scores.get_shape().as_list()[1]],
initializer=tf.constant_initializer())
self.object_scores_bias = tf.get_variable(
'object_beta',
shape=[self.object_scores.get_shape().as_list()[1]],
initializer=tf.constant_initializer())
self.object_scores = \
self.object_scores_alpha * self.object_scores + \
self.object_scores_bias
self.object_prob = tf.nn.softmax(
self.object_scores,
name = 'object_prob')
with tf.variable_scope('attribute_score_graph'):
self.attribute_scores = self.compute_cosine_similarity(
self.attribute_embed,
self.attribute_label_embed)
self.attribute_scores_alpha = tf.get_variable(
'attribute_alpha',
shape=[self.attribute_scores.get_shape().as_list()[1]],
initializer=tf.constant_initializer())
self.attribute_scores_bias = tf.get_variable(
'attribute_beta',
shape=[self.attribute_scores.get_shape().as_list()[1]],
initializer=tf.constant_initializer())
self.attribute_scores = \
self.attribute_scores_alpha * self.attribute_scores + \
self.attribute_scores_bias
self.attribute_prob = tf.sigmoid(
self.attribute_scores,
name = 'attribute_prob')
def get_resnet_vars(self):
vars_resnet = []
for s in xrange(5):
vars_resnet += var_collect.collect_scope('scale'+str(s+1))
return vars_resnet
def add_object_graph(self, input): def add_object_graph(self, input):
with tf.variable_scope('object_graph') as object_graph: with tf.variable_scope('object_graph') as object_graph:
...@@ -45,7 +111,8 @@ class ObjectAttributeInference(): ...@@ -45,7 +111,8 @@ class ObjectAttributeInference():
fc2_out = layers.full( fc2_out = layers.full(
fc1_out, fc1_out,
out_dim, out_dim,
'fc') 'fc',
func = None)
return fc2_out return fc2_out
...@@ -70,25 +137,106 @@ class ObjectAttributeInference(): ...@@ -70,25 +137,106 @@ class ObjectAttributeInference():
fc2_out = layers.full( fc2_out = layers.full(
fc1_out, fc1_out,
out_dim, out_dim,
'fc') 'fc',
func = None)
return fc2_out return fc2_out
def add_object_label_graph(self, input):
with tf.variable_scope('object_label_graph'):
out_dim = self.object_embed.get_shape().as_list()[-1]
with tf.variable_scope('fc1') as fc1:
in_dim = input.get_shape().as_list()[-1]
fc1_out = layers.full(
input,
out_dim,
'fc',
func = None)
fc1_out = layers.batch_norm(
fc1_out,
tf.constant(self.training))
fc1_out = tf.nn.relu(fc1_out)
with tf.variable_scope('fc2') as fc2:
in_dim = fc1_out.get_shape().as_list()[-1]
fc2_out = layers.full(
fc1_out,
out_dim,
'fc',
func = None)
return fc2_out
def add_attribute_label_graph(self, input):
with tf.variable_scope('attribute_label_graph'):
out_dim = self.attribute_embed.get_shape().as_list()[-1]
with tf.variable_scope('fc1') as fc1:
in_dim = input.get_shape().as_list()[-1]
fc1_out = layers.full(
input,
out_dim,
'fc',
func = None)
fc1_out = layers.batch_norm(
fc1_out,
tf.constant(self.training))
fc1_out = tf.nn.relu(fc1_out)
with tf.variable_scope('fc2') as fc2:
in_dim = fc1_out.get_shape().as_list()[-1]
fc2_out = layers.full(
fc1_out,
out_dim,
'fc',
func = None)
return fc2_out
def compute_cosine_similarity(self, feat1, feat2):
feat1 = tf.nn.l2_normalize(feat1, 1)
feat2 = tf.nn.l2_normalize(feat2, 1)
return tf.matmul(feat1, tf.transpose(feat2), name='cosine_similarity')
def compute_dot_product(self, feat1, feat2):
return tf.matmul(feat1, tf.transpose(feat2), name='dot_product')
if __name__=='__main__': if __name__=='__main__':
im_h, im_w = constants.image_size im_h, im_w = constants.image_size
plh = placeholder_management.PlaceholderManager() plh = placeholder_management.PlaceholderManager()
plh.add_placeholder( plh.add_placeholder(
name = 'image_regions', name = 'image_regions',
dtype = tf.float32, dtype = tf.float32,
shape = [None, im_h, im_w, 3]) shape = [None, im_h, im_w, 3])
plh.add_placeholder(
name = 'object_labels',
dtype = tf.float32,
shape = [None, constants.num_object_labels])
plh.add_placeholder(
name = 'attribute_labels',
dtype = tf.float32,
shape = [None, constants.num_attribute_labels])
word_vec_mgr = word_vector_manager()
training = False training = False
ObjectAttributeInference( obj_atr_inference = ObjectAttributeInference(
plh['image_regions'], plh['image_regions'],
[], word_vec_mgr.object_label_vectors,
word_vec_mgr.attribute_label_vectors,
training) training)
object_loss = losses.object_loss(
obj_atr_inference.object_scores,
plh['object_labels'])
attribute_loss = losses.attribute_loss(
obj_atr_inference.attribute_scores,
plh['attribute_labels'])
vars_to_regularize = tf.get_collection('to_regularize')
var_collect.print_var_list(vars_to_regularize, 'to_regularize')
import pdb
import os
import ujson
import numpy as np
import data.cropped_regions as cropped_regions
import tftools.data
from tftools import var_collect, placeholder_management
from object_attribute_classifier import inference
from word2vec.word_vector_management import word_vector_manager
import losses
import constants
import tensorflow as tf
tb_log_dir = constants.tb_log_dir
batch_size = constants.region_batch_size
num_samples = constants.region_num_samples
num_epochs = constants.region_num_epochs
offset = constants.region_offset
queue_size = constants.region_queue_size
im_h, im_w = constants.image_size
num_object_labels = constants.num_object_labels
num_attribute_labels = constants.num_attribute_labels
regularization_coeff = constants.region_regularization_coeff
lr = constants.region_lr
log_every_n_iter = constants.region_log_every_n_iter
output_dir = constants.region_output_dir
model = constants.region_model
resnet_model = constants.resnet_ckpt
class graph_creator():
def __init__(self, training=True):
self.tf_graph = tf.Graph()
with self.tf_graph.as_default():
self.create_placeholders()
self.word_vec_mgr = word_vector_manager()
self.obj_atr_inference = inference.ObjectAttributeInference(
self.plh['region_images'],
self.word_vec_mgr.object_label_vectors,
self.word_vec_mgr.attribute_label_vectors,
training)
self.add_losses()
self.vars_to_save = tf.all_variables()
self.merged = tf.merge_all_summaries()
self.writer = tf.train.SummaryWriter(
tb_log_dir,
graph = self.tf_graph)
def create_placeholders(self):
self.plh = placeholder_management.PlaceholderManager()
self.plh.add_placeholder(
'region_images',
tf.float32,
shape=[None, im_h, im_w, 3])
self.plh.add_placeholder(
'object_labels',
tf.float32,
shape=[None, num_object_labels])
self.plh.add_placeholder(
'attribute_labels',
tf.float32,
shape=[None, num_attribute_labels])
def add_losses(self):
self.object_loss = losses.object_loss(
self.obj_atr_inference.object_scores,
self.plh['object_labels'])
self.attribute_loss = losses.attribute_loss(
self.obj_atr_inference.attribute_scores,
self.plh['attribute_labels'])
self.regularization_loss = self.regularization()
self.total_loss = self.object_loss + \
self.attribute_loss + \
self.regularization_loss
total_loss_summary = tf.scalar_summary(
"total_loss",
self.total_loss)
def regularization(self):
vars_to_regularize = tf.get_collection('to_regularize')
loss = losses.regularization_loss(
vars_to_regularize,
regularization_coeff)
return loss
def create_initializer(graph, sess):
class initializer():
def __init__(self):
with graph.tf_graph.as_default():
resnet_vars = graph.obj_atr_inference.resnet_vars
resnet_restorer = tf.train.Saver(resnet_vars)
resnet_restorer.restore(sess, resnet_model)
not_to_init = resnet_vars
all_vars = tf.all_variables()
other_vars = [var for var in all_vars
if var not in not_to_init]
var_collect.print_var_list(
other_vars,
'vars_to_init')
self.init = tf.initialize_variables(other_vars)
def initialize(self):
sess.run(self.init)
return initializer()
def create_batch_generator():
data_mgr = cropped_regions.data(
constants.image_dir,
constants.object_labels_json,
constants.attribute_labels_json,
constants.regions_json,
constants.image_size,
channels=3,
mean_image_filename=None)
index_generator = tftools.data.random(
batch_size,
num_samples,
num_epochs,
offset)
batch_generator = tftools.data.async_batch_generator(
data_mgr,
index_generator,
queue_size)
return batch_generator
def create_feed_dict_creator(plh):
def feed_dict_creator(batch):
inputs = {
'region_images': batch['region_images'],
'object_labels': batch['object_labels'],
'attribute_labels': batch['attribute_labels']
}
return plh.get_feed_dict(inputs)
return feed_dict_creator
class attach_optimizer():
def __init__(self, graph):
with graph.tf_graph.as_default():
resnet_vars = graph.obj_atr_inference.resnet_vars
all_trainable_vars = tf.trainable_variables()
not_to_train = resnet_vars + \
[graph.word_vec_mgr.word_vectors]
vars_to_train = [
var for var in all_trainable_vars
if var not in not_to_train]
var_collect.print_var_list(
vars_to_train,
'vars_to_train')
self.ops = dict()
self.add_adam_optimizer(
graph.total_loss,
vars_to_train,
'all_but_resnet')
def add_adam_optimizer(self, loss, var_list, name):
train_step = tf.train.AdamOptimizer(lr) \
.minimize(
loss,
var_list = var_list)
self.ops[name] = train_step
class log_mgr():
def __init__(
self,
graph,
vars_to_save,
sess,
log_every_n_iter,
output_dir,
model_path):
self.graph = graph
self.vars_to_save = vars_to_save
self.sess = sess
self.log_every_n_iter = log_every_n_iter
self.output_dir = output_dir
self.model_path = model_path
self.model_saver = tf.train.Saver(
var_list = vars_to_save,
max_to_keep = 0)
self.loss_values = dict()
def log(self, iter, is_last=False, eval_vars_dict=None):
if eval_vars_dict:
self.graph.writer.add_summary(
eval_vars_dict['merged'],
iter)
print 'object'
print np.max(eval_vars_dict['object_prob'][0,:])
print np.min(eval_vars_dict['object_prob'][0,:])
print np.max(eval_vars_dict['object_scores'][0,:])
print np.min(eval_vars_dict['object_scores'][0,:])
print 'attribute'
print np.max(eval_vars_dict['attribute_prob'][0,:])
print np.min(eval_vars_dict['attribute_prob'][0,:])
print np.max(eval_vars_dict['attribute_scores'][0,:])
print np.min(eval_vars_dict['attribute_scores'][0,:])
self.loss_values[str(iter)] = {
'total_loss': str(eval_vars_dict['total_loss']),
'object_loss': str(eval_vars_dict['object_loss']),
'attribute_loss': str(eval_vars_dict['attribute_loss'])}
if iter % self.log_every_n_iter==0 or is_last:
self.model_saver.save(
self.sess,
self.model_path,
global_step=iter)
loss_path = os.path.join(
self.output_dir,
'losses_' + str(iter) + '.json')
with open(loss_path, 'w') as outfile:
ujson.dump(
self.loss_values,
outfile,
sort_keys=True,
indent=4)
def train(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
logger):
vars_to_eval_names = []
vars_to_eval = []
for var_name, var in vars_to_eval_dict.items():
vars_to_eval_names += [var_name]
vars_to_eval += [var]
with sess.as_default():
initializer.initialize()
iter = 0
for batch in batch_generator:
print iter
feed_dict = feed_dict_creator(batch)
eval_vars = sess.run(
vars_to_eval,
feed_dict = feed_dict)
eval_vars_dict = {
var_name: eval_var for var_name, eval_var in
zip(vars_to_eval_names, eval_vars)}
logger.log(iter, False, eval_vars_dict)
iter+=1
logger.log(iter-1, True, eval_vars_dict)
if __name__=='__main__':
print 'Creating batch generator...'
batch_generator = create_batch_generator()
print 'Creating computation graph...'
graph = graph_creator()
print 'Attaching optimizer...'
optimizer = attach_optimizer(graph)
print 'Starting a session...'
sess = tf.Session(graph=graph.tf_graph)
print 'Creating initializer...'
initializer = create_initializer(graph, sess)
print 'Creating feed dict creator...'
feed_dict_creator = create_feed_dict_creator(graph.plh)
print 'Creating dict of vars to be evaluated...'
vars_to_eval_dict = {
'object_prob': graph.obj_atr_inference.object_prob,
'object_scores': graph.obj_atr_inference.object_scores,
'attribute_prob': graph.obj_atr_inference.attribute_prob,
'attribute_scores': graph.obj_atr_inference.attribute_scores,
'attribute_embed': graph.obj_atr_inference.attribute_embed,
'avg_pool_feat': graph.obj_atr_inference.avg_pool_feat,
'total_loss': graph.total_loss,
'object_loss': graph.object_loss,
'attribute_loss': graph.attribute_loss,
'optimizer_op': optimizer.ops['all_but_resnet'],
'merged': graph.merged,
}
print 'Creating logger...'
vars_to_save = graph.vars_to_save
logger = log_mgr(
graph,
vars_to_save,
sess,
log_every_n_iter,
output_dir,
model)
print 'Start training...'
train(
batch_generator,
sess,
initializer,
vars_to_eval_dict,
feed_dict_creator,
logger)
...@@ -78,10 +78,10 @@ def inference(x, is_training, ...@@ -78,10 +78,10 @@ def inference(x, is_training,
# post-net # post-net
x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool")
# if num_classes != None: if num_classes != None:
# with tf.variable_scope('fc'): with tf.variable_scope('fc'):
# x = fc(x, c) x = fc(x, c)
return x return x
......
...@@ -27,7 +27,7 @@ if __name__=='__main__': ...@@ -27,7 +27,7 @@ if __name__=='__main__':
'Resnet/tensorflow-resnet-pretrained-20160509' 'Resnet/tensorflow-resnet-pretrained-20160509'
ckpt_filename = os.path.join(model_dir, 'ResNet-L50.ckpt') ckpt_filename = os.path.join(model_dir, 'ResNet-L50.ckpt')
img = image_io.imread("/home/tanmay/Code/GenVQA/GenVQA/resnet/dalmatian.jpg") img = image_io.imread("/home/tanmay/Code/GenVQA/GenVQA/resnet/schooner.jpg")
img = image_io.imresize(img, output_size=(224,224)) img = image_io.imresize(img, output_size=(224,224))
img = img.astype(np.float32) img = img.astype(np.float32)
......
...@@ -2,7 +2,7 @@ import tensorflow as tf ...@@ -2,7 +2,7 @@ import tensorflow as tf
def print_var_list(var_list, name='Variables'): def print_var_list(var_list, name='Variables'):
print name + ': \n' + '[' + ', '.join([var.name for var in var_list]) + ']' print name + ': \n' + '[' + ',\n '.join([var.name for var in var_list]) + ']'
def collect_name(var_name, graph=None): def collect_name(var_name, graph=None):
......
from gensim.models import word2vec from gensim.models import word2vec
import numpy as np import numpy as np
import json import ujson
import pdb import pdb
import constants import constants
...@@ -16,7 +16,7 @@ def get_vocab_word_vectors( ...@@ -16,7 +16,7 @@ def get_vocab_word_vectors(
vocab_word_vectors = 2*np.random.rand( vocab_word_vectors = 2*np.random.rand(
vocab_size, vocab_size,
constants.word_vector_size) constants.word_vector_size)
vocab_word_vectors -= 0.5 vocab_word_vectors -= 1.0
found_word_vec = 0 found_word_vec = 0
for word, index in vocab.items(): for word, index in vocab.items():
...@@ -25,7 +25,7 @@ def get_vocab_word_vectors( ...@@ -25,7 +25,7 @@ def get_vocab_word_vectors(
vocab_word_vectors[index,:] = model[word] vocab_word_vectors[index,:] = model[word]
np.save( np.save(
constants.vocab_word_vectors_npy, constants.pretrained_vocab_word_vectors_npy,
vocab_word_vectors) vocab_word_vectors)
print 'Found word vectors for {} out of {} words'.format( print 'Found word vectors for {} out of {} words'.format(
...@@ -39,7 +39,9 @@ if __name__=='__main__': ...@@ -39,7 +39,9 @@ if __name__=='__main__':
binary=True) binary=True)
with open(constants.vocab_json, 'r') as file: with open(constants.vocab_json, 'r') as file:
vocab = json.load(file) vocab = ujson.load(file)
get_vocab_word_vectors(model, vocab) get_vocab_word_vectors(model, vocab)
import numpy as np
import pdb
import json
from tftools import var_collect, placeholder_management, layers
import constants
import tensorflow as tf
class word_vector_manager():
def __init__(self):
self.vocab_word_vectors = np.load(
constants.pretrained_vocab_word_vectors_npy)
self.vocab_word_vectors = self.vocab_word_vectors.astype(np.float32)
self.vocab_size = self.vocab_word_vectors.shape[0]
self.read_object_labels()
self.read_attribute_labels()
self.read_vocab()
with tf.variable_scope('word_vectors') as word_vectors:
self.init_word_vector_tensor()
self.normalized_word_vectors = tf.nn.l2_normalize(
self.word_vectors, 1)
with tf.variable_scope('object_label_word_vectors'):
self.object_label_word_vectors()
with tf.variable_scope('attribute_label_word_vectors'):
self.attribute_label_word_vectors()
def init_word_vector_tensor(self):
self.word_vectors = tf.get_variable(
name = 'word_vector',
shape = [self.vocab_size, constants.word_vector_size],
initializer = tf.constant_initializer(self.vocab_word_vectors))
tf.add_to_collection('to_regularize', self.word_vectors)
def read_object_labels(self):
with open(constants.object_labels_json, 'r') as file:
self.object_labels = json.load(file)
def read_attribute_labels(self):
with open(constants.attribute_labels_json, 'r') as file:
self.attribute_labels = json.load(file)
def read_vocab(self):
with open(constants.vocab_json, 'r') as file:
self.vocab = json.load(file)
def create_phrase_word_vectors(self, phrase, scope_name):
with tf.variable_scope(scope_name) as phrase_graph:
words = phrase.split(" ")
ids = []
for word in words:
if word in self.vocab:
ids += [self.vocab[word]]
else:
ids += [self.vocab[constants.unknown_token]]
phrase_word_vector = tf.nn.embedding_lookup(
self.normalized_word_vectors,
tf.constant(ids, dtype=tf.int64),
name = 'embedding_lookup')
phrase_word_vector = tf.reduce_mean(
phrase_word_vector,
0,
True,
'reduce_mean')
return phrase_word_vector
def object_label_word_vectors(self):
inv_object_labels = {v: k for k, v in self.object_labels.items()}
num_object_labels = len(inv_object_labels)
object_label_vector_list = [None]*num_object_labels
for i in xrange(num_object_labels):
object_label_vector_list[i] = self.create_phrase_word_vectors(
inv_object_labels[i],
'object_label_' + str(i))
self.object_label_vectors = tf.concat(
0, object_label_vector_list)
def attribute_label_word_vectors(self):
inv_attribute_labels = {v: k for k, v in self.attribute_labels.items()}
num_attribute_labels = len(inv_attribute_labels)
attribute_label_vector_list = [None]*num_attribute_labels
for i in xrange(num_attribute_labels):
attribute_label_vector_list[i] = self.create_phrase_word_vectors(
inv_attribute_labels[i],
'attribute_label_' + str(i))
self.attribute_label_vectors = tf.concat(
0, attribute_label_vector_list)
if __name__=='__main__':
word_vector_mgr = word_vector_manager()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment