From 4d7f6f092e36ee8236a29cd944f16e1f4b5fc580 Mon Sep 17 00:00:00 2001 From: tgupta6 <tgupta6@illinois.edu> Date: Tue, 14 Jun 2016 10:59:38 -0500 Subject: [PATCH] object attribute classifier training, fine-tune, test setup --- constants.py | 70 ++++- data/cropped_regions.py | 67 ++--- losses.py | 52 ++++ object_attribute_classifier/eval.py | 263 ++++++++++++++++++ object_attribute_classifier/fine_tune.py | 290 ++++++++++++++++++++ object_attribute_classifier/inference.py | 174 +++++++++++- object_attribute_classifier/train.py | 323 +++++++++++++++++++++++ resnet/inference.py | 8 +- test_resnet_inference.py | 2 +- tftools/var_collect.py | 2 +- word2vec/get_vocab_word_vectors.py | 10 +- word2vec/word_vector_management.py | 107 ++++++++ 12 files changed, 1303 insertions(+), 65 deletions(-) create mode 100644 losses.py create mode 100644 object_attribute_classifier/eval.py create mode 100644 object_attribute_classifier/fine_tune.py create mode 100644 object_attribute_classifier/train.py create mode 100644 word2vec/word_vector_management.py diff --git a/constants.py b/constants.py index 8aac6ea..d5e4195 100644 --- a/constants.py +++ b/constants.py @@ -1,4 +1,25 @@ import os +import pdb + +def mkdir_if_not_exists(dir_name): + if not os.path.exists(dir_name): + os.mkdir(dir_name) + +experiment_name = '2' +# Global output directory (all subexperiments will be saved here) +global_output_dir = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome' + +global_experiment_dir = os.path.join( + global_output_dir, + experiment_name) + +tb_log_dir = os.path.join( + global_experiment_dir, + 'tensorboard_logdir') + +mkdir_if_not_exists(global_output_dir) +mkdir_if_not_exists(global_experiment_dir) +mkdir_if_not_exists(tb_log_dir) #height and width to which images are resized before feeding into networks image_size = (224, 224) @@ -27,17 +48,19 @@ mean_image_filename = os.path.join( data_absolute_path, 'restructured/mean_image.jpg') -# Vocabulary vocab_json = os.path.join( data_absolute_path, 'restructured/vocab_subset.json') +num_object_labels = 1000 +num_attribute_labels = 1000 + # Regions data partition -# First 70% meant to be used for training +# First 80% meant to be used for training # Next 10% is set aside for validation -# Last 20% is to be used for testing +# Last 10% is to be used for testing num_total_regions = 1951768 -num_train_regions = 1366238 # First 70% +num_train_regions = 1561416 # First 80% num_val_regions = 195176 # Next 10% num_test_regions = num_total_regions \ - num_train_regions \ @@ -55,12 +78,37 @@ word2vec_binary = '/home/tanmay/Code/word2vec/word2vec-api-master/' + \ word_vector_size = 300 # Numpy matrix storing vocabulary word vectors -vocab_word_vectors_npy = os.path.join( +pretrained_vocab_word_vectors_npy = os.path.join( data_absolute_path, - 'restructured/vocab_word_vectors.npy') - - - - - + 'restructured/pretrained_vocab_word_vectors.npy') + +# Object Attribute Classifier Training Params +region_batch_size = 100 +region_num_samples = num_total_regions +region_num_epochs = 10 +region_offset = 0 +region_queue_size = 400 +region_regularization_coeff = 1e-4 +region_lr = 1e-2 +region_log_every_n_iter = 400 +region_output_dir = os.path.join( + global_experiment_dir, + 'object_attribute_classifiers') + +mkdir_if_not_exists(region_output_dir) + +region_model = os.path.join( + region_output_dir, + 'model') + +region_fine_tune_from_iter = 18800 +region_fine_tune_from = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome/' + \ + '1/object_attribute_classifiers/model-' + \ + str(region_fine_tune_from_iter) +region_fine_tune_lr = 1e-2 + +# Object Attribute Classifier Evaluation Params +region_eval_on = 'val' # One of {'val','test'} +region_model_to_eval = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome/' + \ + '2/object_attribute_classifiers/model-22000' diff --git a/data/cropped_regions.py b/data/cropped_regions.py index 174b22b..5b0b38e 100644 --- a/data/cropped_regions.py +++ b/data/cropped_regions.py @@ -1,5 +1,6 @@ import numpy as np -import json +#import json +import ujson import os import pdb import time @@ -54,13 +55,13 @@ class data(): def read_json_file(self, filename): print 'Reading {} ...'.format(filename) with open(filename, 'r') as file: - return json.load(file) + return ujson.load(file) def get(self, samples): batch_size = len(samples) batch = dict() batch['region_ids'] = dict() - batch['images'] = np.zeros( + batch['region_images'] = np.zeros( [batch_size, self.h, self.w, self.c], np.float32) batch['object_labels'] = np.zeros( [batch_size, len(self.object_labels_dict)], np.float32) @@ -68,7 +69,7 @@ class data(): [batch_size, len(self.attribute_labels_dict)], np.float32) for index, sample in enumerate(samples): batch['region_ids'][index] = self.sample_to_region_dict[sample] - batch['images'][index, :, :, :], read_success = \ + batch['region_images'][index, :, :, :], read_success = \ self.get_region_image(sample) if read_success: batch['object_labels'][index, :] = self.get_object_label(sample) @@ -79,19 +80,22 @@ class data(): def get_single(self, sample, batch_list, worker_id): try: batch = dict() - batch['region_ids'] = dict() - batch['images'] = np.zeros( - [self.h, self.w, self.c], np.float32) - batch['object_labels'] = np.zeros( - [len(self.object_labels_dict)], np.float32) - batch['attribute_labels'] = np.zeros( - [len(self.attribute_labels_dict)], np.float32) - - batch['region_ids'] = self.sample_to_region_dict[sample] - batch['images'], read_success = self.get_region_image(sample) + batch['region_id'] = self.sample_to_region_dict[sample] + batch['region_image'], read_success = self.get_region_image(sample) if read_success: - batch['object_labels'] = self.get_object_label(sample) - batch['attribute_labels'] = self.get_attribute_label(sample) + batch['object_label'], batch['object_label_words'] = \ + self.get_object_label(sample) + batch['attribute_label'], batch['attribute_label_words'] = \ + self.get_attribute_label(sample) + else: + batch['region_image'] = np.zeros( + [self.h, self.w, self.c], np.float32) + batch['object_label'] = np.zeros( + [len(self.object_labels_dict)], np.float32) + batch['attribute_label'] = np.zeros( + [len(self.attribute_labels_dict)], np.float32) + batch['object_label_words'] = [] + batch['attribute_label_words'] = [] batch_list[worker_id] = batch @@ -111,24 +115,24 @@ class data(): worker.start() workers.append(worker) - for worker in jobs: + for worker in workers: worker.join() batch_size = len(samples) batch = dict() batch['region_ids'] = dict() - batch['images'] = np.zeros( + batch['region_images'] = np.zeros( [batch_size, self.h, self.w, self.c], np.float32) - batch['object_labels'] = np.zeros( + batch['object_labels']= np.zeros( [batch_size, len(self.object_labels_dict)], np.float32) batch['attribute_labels'] = np.zeros( [batch_size, len(self.attribute_labels_dict)], np.float32) for index, single_batch in enumerate(batch_list): - batch['region_ids'][index] = single_batch['region_ids'] - batch['images'][index, :, :, :] = single_batch['images'] - batch['object_labels'][index, :] = single_batch['object_labels'] - batch['attribute_labels'][index,:] = single_batch['attribute_labels'] + batch['region_ids'][index] = single_batch['region_id'] + batch['region_images'][index, :, :, :] = single_batch['region_image'] + batch['object_labels'][index, :] = single_batch['object_label'] + batch['attribute_labels'][index,:] = single_batch['attribute_label'] return batch @@ -142,13 +146,13 @@ class data(): read_success = True try: region_image = image_io.imread(filename) + region_image = region_image.astype(np.float32) except: + print 'Could not read image: Setting the image pixels to 0s' read_success = False - region_image = np.zeros([self.h, self.w], dtype) - - region_image = region_image.astype(np.float32) + region_image = np.zeros([self.h, self.w, 3], dtype=np.float32) - return region_image / 255 - self.mean_image, read_success + return region_image, read_success def single_to_three_channel(self, image): if len(image.shape)==3: @@ -171,7 +175,7 @@ class data(): def get_mean_image(self, mean_image_filename): if mean_image_filename: return image_io.imread(mean_image_filename).astype( - np.float32) / 255 + np.float32) else: return np.zeros([self.h, self.w, self.c], np.float32) @@ -195,7 +199,7 @@ class data(): label_id = self.object_labels_dict[_unknown_token] object_label_encoding[0,label_id] = 1.0 - return object_label_encoding/np.sum(object_label_encoding) + return object_label_encoding/np.sum(object_label_encoding), object_labels def get_attribute_label(self, sample): # Attribute is turned on if it is present @@ -209,7 +213,7 @@ class data(): label_id = self.attribute_labels_dict[attribute] attribute_label_encoding[0,label_id] = 1.0 - return attribute_label_encoding + return attribute_label_encoding, attribute_labels if __name__=='__main__': data_mgr = data(constants.image_dir, @@ -250,6 +254,7 @@ if __name__=='__main__': num_samples = 200 num_epochs = 1 offset = 0 + queue_size = 100 index_generator = tftools.data.random( batch_size, @@ -260,7 +265,7 @@ if __name__=='__main__': batch_generator = tftools.data.async_batch_generator( data_mgr, index_generator, - 100) + queue_size) count = 0 for batch in batch_generator: diff --git a/losses.py b/losses.py new file mode 100644 index 0000000..0529691 --- /dev/null +++ b/losses.py @@ -0,0 +1,52 @@ +import tensorflow as tf + + +def object_loss(scores, labels): + with tf.variable_scope('object_loss'): + loss_vector = tf.nn.softmax_cross_entropy_with_logits( + scores, + labels, + name='softmax_cross_entropy_with_logits') + + loss = tf.reduce_mean( + loss_vector, + name='average_loss') + + return loss + + +def attribute_loss(scores, labels): + with tf.variable_scope('attribute_loss'): + loss_matrix = tf.nn.sigmoid_cross_entropy_with_logits( + scores, + labels, + name='sigmoid_cross_entropy_with_logits') + + # label_count = tf.reduce_mean( + # labels, + # 0, + # keep_dims=True, + # name='label_count') + + # label_count = tf.truediv( + # label_count, + # tf.to_float(label_count.get_shape().as_list()[0]), + # name='normalized_label_count') + + loss = tf.reduce_mean( + loss_matrix, +# tf.matmul(loss_matrix, tf.transpose(label_count)), + name='average_loss') + + return loss + + +def regularization_loss(param_list, coeff): + regularizer = tf.zeros(shape=[]) + for param in param_list: + regularizer += tf.nn.l2_loss(param) + return coeff*regularizer + + + + diff --git a/object_attribute_classifier/eval.py b/object_attribute_classifier/eval.py new file mode 100644 index 0000000..9ae30c2 --- /dev/null +++ b/object_attribute_classifier/eval.py @@ -0,0 +1,263 @@ +import pdb +import os +import ujson +import numpy as np + +import data.cropped_regions as cropped_regions + +import tftools.data +from tftools import var_collect, placeholder_management +from object_attribute_classifier import inference +from word2vec.word_vector_management import word_vector_manager + +import losses +import constants + +import tensorflow as tf + +eval_on = constants.region_eval_on + +batch_size = constants.region_batch_size +num_epochs = constants.region_num_epochs + +if eval_on=='val': + num_samples = constants.num_val_regions + offset = constants.num_train_regions +elif eval_on=='test': + num_samples = constants.num_test_regions + offset = constants.num_train_regions + \ + constants.num_val_regions +else: + print "eval_on can only be either 'val' or 'test'" + +queue_size = constants.region_queue_size + +im_h, im_w = constants.image_size +num_object_labels = constants.num_object_labels +num_attribute_labels = constants.num_attribute_labels + +model_to_eval = constants.region_model_to_eval + + +class graph_creator(): + def __init__(self, training=True): + self.tf_graph = tf.Graph() + with self.tf_graph.as_default(): + self.create_placeholders() + self.word_vec_mgr = word_vector_manager() + self.obj_atr_inference = inference.ObjectAttributeInference( + self.plh['region_images'], + self.word_vec_mgr.object_label_vectors, + self.word_vec_mgr.attribute_label_vectors, + training) +# self.add_losses() + self.vars_to_save = tf.all_variables() + + def create_placeholders(self): + self.plh = placeholder_management.PlaceholderManager() + + self.plh.add_placeholder( + 'region_images', + tf.float32, + shape=[None, im_h, im_w, 3]) + + self.plh.add_placeholder( + 'object_labels', + tf.float32, + shape=[None, num_object_labels]) + + self.plh.add_placeholder( + 'attribute_labels', + tf.float32, + shape=[None, num_attribute_labels]) + + def add_losses(self): + self.object_loss = losses.object_loss( + self.obj_atr_inference.object_scores, + self.plh['object_labels']) + + self.attribute_loss = losses.attribute_loss( + self.obj_atr_inference.attribute_scores, + self.plh['attribute_labels']) + + self.regularization_loss = self.regularization() + + self.total_loss = self.object_loss + \ + self.attribute_loss + \ + self.regularization_loss + + def regularization(self): + vars_to_regularize = tf.get_collection('to_regularize') + loss = losses.regularization_loss( + vars_to_regularize, + regularization_coeff) + return loss + + +def create_initializer(graph, sess): + class initializer(): + def __init__(self): + with graph.tf_graph.as_default(): + model_restorer = tf.train.Saver(graph.vars_to_save) + model_restorer.restore(sess, model_to_eval) + + def initialize(self): + pass + + return initializer() + + +def create_batch_generator(): + data_mgr = cropped_regions.data( + constants.image_dir, + constants.object_labels_json, + constants.attribute_labels_json, + constants.regions_json, + constants.image_size, + channels=3, + mean_image_filename=None) + + index_generator = tftools.data.random( + batch_size, + num_samples, + num_epochs, + offset) + + batch_generator = tftools.data.async_batch_generator( + data_mgr, + index_generator, + queue_size) + + return batch_generator + + +def create_feed_dict_creator(plh): + def feed_dict_creator(batch): + inputs = { + 'region_images': batch['region_images'], + 'object_labels': batch['object_labels'], + 'attribute_labels': batch['attribute_labels'] + } + return plh.get_feed_dict(inputs) + + return feed_dict_creator + + +class eval_mgr(): + def __init__(self): + self.correct_objects = 0 + self.correct_attributes = 0 + self.num_iter = 0 + self.num_object_samples = 0 + self.num_attribute_samples = 0 + + def eval(self, + iter, + eval_vars_dict, + labels): + + self.eval_object_accuracy( + eval_vars_dict['object_prob'], + labels['objects']) + + self.eval_attribute_accuracy( + eval_vars_dict['attribute_prob'], + labels['attributes']) + + def eval_object_accuracy( + self, + prob, + labels): + + matches = np.equal( + np.argmax(prob, 1), + np.argmax(labels, 1)).astype(np.int32) + + self.correct_objects += np.sum(matches) + self.num_object_samples += matches.shape[0] + + def eval_attribute_accuracy( + self, + prob, + labels): + + matches = np.equal( + prob > 0.5, + labels == 1).astype(np.int32) + + self.correct_attributes += np.sum(matches) + self.num_attribute_samples += (matches.shape[0]*matches.shape[1]) + pdb.set_trace() + + def get_object_accuracy(self): + return self.correct_objects/float(self.num_object_samples) + + def get_attribute_accuracy(self): + return self.correct_attributes/float(self.num_attribute_samples) + + +def eval( + batch_generator, + sess, + initializer, + vars_to_eval_dict, + feed_dict_creator, + evaluator): + + vars_to_eval_names = [] + vars_to_eval = [] + for var_name, var in vars_to_eval_dict.items(): + vars_to_eval_names += [var_name] + vars_to_eval += [var] + + with sess.as_default(): + initializer.initialize() + + iter = 0 + for batch in batch_generator: + print iter + feed_dict = feed_dict_creator(batch) + eval_vars = sess.run( + vars_to_eval, + feed_dict = feed_dict) + eval_vars_dict = { + var_name: eval_var for var_name, eval_var in + zip(vars_to_eval_names, eval_vars)} + labels = dict() + labels['objects'] = batch['object_labels'] + labels['attributes'] = batch['attribute_labels'] + evaluator.eval(iter, eval_vars_dict, labels) + print 'Object accuracy: {}'.format( + evaluator.get_object_accuracy()) + print 'Attribute accuracy: {}'.format( + evaluator.get_attribute_accuracy()) + iter+=1 + + +if __name__=='__main__': + print 'Creating batch generator...' + batch_generator = create_batch_generator() + print 'Creating computation graph...' + graph = graph_creator(False) + print 'Starting a session...' + sess = tf.Session(graph=graph.tf_graph) + print 'Creating initializer...' + initializer = create_initializer(graph, sess) + print 'Creating feed dict creator...' + feed_dict_creator = create_feed_dict_creator(graph.plh) + print 'Creating dict of vars to be evaluated...' + vars_to_eval_dict = { + 'object_prob': graph.obj_atr_inference.object_prob, + 'attribute_prob': graph.obj_atr_inference.attribute_prob, + } + print 'Creating evaluator...' + evaluator = eval_mgr() + print 'Start evaluating...' + eval( + batch_generator, + sess, + initializer, + vars_to_eval_dict, + feed_dict_creator, + evaluator) + diff --git a/object_attribute_classifier/fine_tune.py b/object_attribute_classifier/fine_tune.py new file mode 100644 index 0000000..d570e13 --- /dev/null +++ b/object_attribute_classifier/fine_tune.py @@ -0,0 +1,290 @@ +import pdb +import os +import ujson + +import data.cropped_regions as cropped_regions + +import tftools.data +from tftools import var_collect, placeholder_management +from object_attribute_classifier import inference +from word2vec.word_vector_management import word_vector_manager + +import losses +import constants + +import tensorflow as tf + + +batch_size = constants.region_batch_size +num_samples = constants.region_num_samples +num_epochs = constants.region_num_epochs +offset = constants.region_offset +queue_size = constants.region_queue_size + +im_h, im_w = constants.image_size +num_object_labels = constants.num_object_labels +num_attribute_labels = constants.num_attribute_labels + +regularization_coeff = constants.region_regularization_coeff +lr = constants.region_fine_tune_lr +log_every_n_iter = constants.region_log_every_n_iter + +output_dir = constants.region_output_dir + +model = constants.region_model +fine_tune_from_iter = constants.region_fine_tune_from_iter +fine_tune_from = constants.region_fine_tune_from + +#resnet_model = constants.resnet_ckpt + +class graph_creator(): + def __init__(self, training=True): + self.tf_graph = tf.Graph() + with self.tf_graph.as_default(): + self.create_placeholders() + self.word_vec_mgr = word_vector_manager() + self.obj_atr_inference = inference.ObjectAttributeInference( + self.plh['region_images'], + self.word_vec_mgr.object_label_vectors, + self.word_vec_mgr.attribute_label_vectors, + training) + self.add_losses() + self.vars_to_save = tf.all_variables() + + def create_placeholders(self): + self.plh = placeholder_management.PlaceholderManager() + + self.plh.add_placeholder( + 'region_images', + tf.float32, + shape=[None, im_h, im_w, 3]) + + self.plh.add_placeholder( + 'object_labels', + tf.float32, + shape=[None, num_object_labels]) + + self.plh.add_placeholder( + 'attribute_labels', + tf.float32, + shape=[None, num_attribute_labels]) + + def add_losses(self): + self.object_loss = losses.object_loss( + self.obj_atr_inference.object_scores, + self.plh['object_labels']) + + self.attribute_loss = losses.attribute_loss( + self.obj_atr_inference.attribute_scores, + self.plh['attribute_labels']) + + self.regularization_loss = self.regularization() + + self.total_loss = self.object_loss + \ + self.attribute_loss + \ + self.regularization_loss + + def regularization(self): + vars_to_regularize = tf.get_collection('to_regularize') + loss = losses.regularization_loss( + vars_to_regularize, + regularization_coeff) + return loss + + +def create_initializer(graph, sess): + class initializer(): + def __init__(self): + with graph.tf_graph.as_default(): + model_restorer = tf.train.Saver(graph.vars_to_save) + model_restorer.restore(sess, fine_tune_from) + + all_vars = tf.all_variables() + other_vars = [var for var in all_vars + if var not in graph.vars_to_save] + var_collect.print_var_list( + other_vars, + 'optimizer_vars') + self.init = tf.initialize_variables(other_vars) + + def initialize(self): + sess.run(self.init) + + return initializer() + + +def create_batch_generator(): + data_mgr = cropped_regions.data( + constants.image_dir, + constants.object_labels_json, + constants.attribute_labels_json, + constants.regions_json, + constants.image_size, + channels=3, + mean_image_filename=None) + + index_generator = tftools.data.random( + batch_size, + num_samples, + num_epochs, + offset) + + batch_generator = tftools.data.async_batch_generator( + data_mgr, + index_generator, + queue_size) + + return batch_generator + + +def create_feed_dict_creator(plh): + def feed_dict_creator(batch): + inputs = { + 'region_images': batch['region_images'], + 'object_labels': batch['object_labels'], + 'attribute_labels': batch['attribute_labels'] + } + return plh.get_feed_dict(inputs) + + return feed_dict_creator + + +class attach_optimizer(): + def __init__(self, graph): + with graph.tf_graph.as_default(): + vars_to_train = tf.trainable_variables() + var_collect.print_var_list( + vars_to_train, + 'vars_to_train') + self.ops = dict() + self.add_adam_optimizer( + graph.total_loss, + vars_to_train, + 'all_trainable_vars') + + def add_adam_optimizer(self, loss, var_list, name): + train_step = tf.train.AdamOptimizer(lr) \ + .minimize( + loss, + var_list = var_list) + self.ops[name] = train_step + + +class log_mgr(): + def __init__( + self, + vars_to_save, + sess, + log_every_n_iter, + output_dir, + model_path): + + self.vars_to_save = vars_to_save + self.sess = sess + self.log_every_n_iter = log_every_n_iter + self.output_dir = output_dir + self.model_path = model_path + + self.model_saver = tf.train.Saver( + var_list = vars_to_save, + max_to_keep = 0) + + self.loss_values = dict() + + def log(self, iter, is_last=False, eval_vars_dict=None): + if eval_vars_dict: + self.loss_values[str(iter)] = { + 'total_loss': str(eval_vars_dict['total_loss']), + 'object_loss': str(eval_vars_dict['object_loss']), + 'attribute_loss': str(eval_vars_dict['attribute_loss'])} + + if iter % self.log_every_n_iter==0 or is_last: + self.model_saver.save( + self.sess, + self.model_path, + global_step=iter) + + loss_path = os.path.join( + self.output_dir, + 'losses_' + str(iter) + '.json') + + with open(loss_path, 'w') as outfile: + ujson.dump( + self.loss_values, + outfile, + sort_keys=True, + indent=4) + + +def train( + batch_generator, + sess, + initializer, + vars_to_eval_dict, + feed_dict_creator, + logger): + + vars_to_eval_names = [] + vars_to_eval = [] + for var_name, var in vars_to_eval_dict.items(): + vars_to_eval_names += [var_name] + vars_to_eval += [var] + + with sess.as_default(): + initializer.initialize() + + iter = fine_tune_from_iter+1 + for batch in batch_generator: + print iter + feed_dict = feed_dict_creator(batch) + eval_vars = sess.run( + vars_to_eval, + feed_dict = feed_dict) + eval_vars_dict = { + var_name: eval_var for var_name, eval_var in + zip(vars_to_eval_names, eval_vars)} + logger.log(iter, False, eval_vars_dict) + iter+=1 + + logger.log(iter-1, True, eval_vars_dict) + +if __name__=='__main__': + print 'Creating batch generator...' + batch_generator = create_batch_generator() + print 'Creating computation graph...' + graph = graph_creator() + print 'Attaching optimizer...' + optimizer = attach_optimizer(graph) + print 'Starting a session...' + sess = tf.Session(graph=graph.tf_graph) + print 'Creating initializer...' + initializer = create_initializer(graph, sess) + print 'Creating feed dict creator...' + feed_dict_creator = create_feed_dict_creator(graph.plh) + print 'Creating dict of vars to be evaluated...' + vars_to_eval_dict = { + 'object_prob': graph.obj_atr_inference.object_prob, + 'attribute_prob': graph.obj_atr_inference.attribute_prob, + 'total_loss': graph.total_loss, + 'object_loss': graph.object_loss, + 'attribute_loss': graph.attribute_loss, + 'optimizer_op': optimizer.ops['all_trainable_vars'] + } + print 'Creating logger...' + vars_to_save = graph.vars_to_save + logger = log_mgr( + vars_to_save, + sess, + log_every_n_iter, + output_dir, + model) + + print 'Start training...' + train( + batch_generator, + sess, + initializer, + vars_to_eval_dict, + feed_dict_creator, + logger) + diff --git a/object_attribute_classifier/inference.py b/object_attribute_classifier/inference.py index 2ce3910..f23ad34 100644 --- a/object_attribute_classifier/inference.py +++ b/object_attribute_classifier/inference.py @@ -3,26 +3,92 @@ import pdb import resnet.inference as resnet_inference from tftools import var_collect, placeholder_management, layers import constants +from word2vec.word_vector_management import word_vector_manager +import losses import tensorflow as tf + class ObjectAttributeInference(): def __init__( self, image_regions, - wordvecs, + object_label_vectors, + attribute_label_vectors, training): self.image_regions = image_regions - self.wordvecs = wordvecs self.training = training - avg_pool_feat = resnet_inference.inference( + self.avg_pool_feat = resnet_inference.inference( self.image_regions, - self.training) - - object_feat = self.add_object_graph(avg_pool_feat) - attribute_feat = self.add_attribute_graph(avg_pool_feat) - pdb.set_trace() + self.training, + num_classes=None) + + + self.avg_pool_feat = layers.batch_norm( + self.avg_pool_feat, + tf.constant(self.training)) + + self.resnet_vars = self.get_resnet_vars() + + self.object_embed = self.add_object_graph(self.avg_pool_feat) + self.attribute_embed = self.add_attribute_graph(self.avg_pool_feat) + + self.object_label_embed = self.add_object_label_graph(object_label_vectors) + self.attribute_label_embed = self.add_attribute_label_graph(attribute_label_vectors) + + with tf.variable_scope('object_score_graph'): + self.object_scores = self.compute_cosine_similarity( + self.object_embed, + self.object_label_embed) + + self.object_scores_alpha = tf.get_variable( + 'object_alpha', + shape=[self.object_scores.get_shape().as_list()[1]], + initializer=tf.constant_initializer()) + + self.object_scores_bias = tf.get_variable( + 'object_beta', + shape=[self.object_scores.get_shape().as_list()[1]], + initializer=tf.constant_initializer()) + + self.object_scores = \ + self.object_scores_alpha * self.object_scores + \ + self.object_scores_bias + + self.object_prob = tf.nn.softmax( + self.object_scores, + name = 'object_prob') + + with tf.variable_scope('attribute_score_graph'): + self.attribute_scores = self.compute_cosine_similarity( + self.attribute_embed, + self.attribute_label_embed) + + self.attribute_scores_alpha = tf.get_variable( + 'attribute_alpha', + shape=[self.attribute_scores.get_shape().as_list()[1]], + initializer=tf.constant_initializer()) + + self.attribute_scores_bias = tf.get_variable( + 'attribute_beta', + shape=[self.attribute_scores.get_shape().as_list()[1]], + initializer=tf.constant_initializer()) + + self.attribute_scores = \ + self.attribute_scores_alpha * self.attribute_scores + \ + self.attribute_scores_bias + + self.attribute_prob = tf.sigmoid( + self.attribute_scores, + name = 'attribute_prob') + + def get_resnet_vars(self): + vars_resnet = [] + for s in xrange(5): + vars_resnet += var_collect.collect_scope('scale'+str(s+1)) + + return vars_resnet def add_object_graph(self, input): with tf.variable_scope('object_graph') as object_graph: @@ -45,7 +111,8 @@ class ObjectAttributeInference(): fc2_out = layers.full( fc1_out, out_dim, - 'fc') + 'fc', + func = None) return fc2_out @@ -70,25 +137,106 @@ class ObjectAttributeInference(): fc2_out = layers.full( fc1_out, out_dim, - 'fc') + 'fc', + func = None) return fc2_out + def add_object_label_graph(self, input): + with tf.variable_scope('object_label_graph'): + out_dim = self.object_embed.get_shape().as_list()[-1] + with tf.variable_scope('fc1') as fc1: + in_dim = input.get_shape().as_list()[-1] + fc1_out = layers.full( + input, + out_dim, + 'fc', + func = None) + fc1_out = layers.batch_norm( + fc1_out, + tf.constant(self.training)) + fc1_out = tf.nn.relu(fc1_out) + + with tf.variable_scope('fc2') as fc2: + in_dim = fc1_out.get_shape().as_list()[-1] + fc2_out = layers.full( + fc1_out, + out_dim, + 'fc', + func = None) + return fc2_out + + def add_attribute_label_graph(self, input): + with tf.variable_scope('attribute_label_graph'): + out_dim = self.attribute_embed.get_shape().as_list()[-1] + with tf.variable_scope('fc1') as fc1: + in_dim = input.get_shape().as_list()[-1] + fc1_out = layers.full( + input, + out_dim, + 'fc', + func = None) + fc1_out = layers.batch_norm( + fc1_out, + tf.constant(self.training)) + fc1_out = tf.nn.relu(fc1_out) + + with tf.variable_scope('fc2') as fc2: + in_dim = fc1_out.get_shape().as_list()[-1] + fc2_out = layers.full( + fc1_out, + out_dim, + 'fc', + func = None) + return fc2_out + + def compute_cosine_similarity(self, feat1, feat2): + feat1 = tf.nn.l2_normalize(feat1, 1) + feat2 = tf.nn.l2_normalize(feat2, 1) + return tf.matmul(feat1, tf.transpose(feat2), name='cosine_similarity') + + def compute_dot_product(self, feat1, feat2): + return tf.matmul(feat1, tf.transpose(feat2), name='dot_product') if __name__=='__main__': im_h, im_w = constants.image_size + plh = placeholder_management.PlaceholderManager() plh.add_placeholder( name = 'image_regions', dtype = tf.float32, shape = [None, im_h, im_w, 3]) - + plh.add_placeholder( + name = 'object_labels', + dtype = tf.float32, + shape = [None, constants.num_object_labels]) + plh.add_placeholder( + name = 'attribute_labels', + dtype = tf.float32, + shape = [None, constants.num_attribute_labels]) + + word_vec_mgr = word_vector_manager() + training = False - ObjectAttributeInference( + obj_atr_inference = ObjectAttributeInference( plh['image_regions'], - [], + word_vec_mgr.object_label_vectors, + word_vec_mgr.attribute_label_vectors, training) + object_loss = losses.object_loss( + obj_atr_inference.object_scores, + plh['object_labels']) + + attribute_loss = losses.attribute_loss( + obj_atr_inference.attribute_scores, + plh['attribute_labels']) + + vars_to_regularize = tf.get_collection('to_regularize') + var_collect.print_var_list(vars_to_regularize, 'to_regularize') + + + diff --git a/object_attribute_classifier/train.py b/object_attribute_classifier/train.py new file mode 100644 index 0000000..9d9d98c --- /dev/null +++ b/object_attribute_classifier/train.py @@ -0,0 +1,323 @@ +import pdb +import os +import ujson +import numpy as np +import data.cropped_regions as cropped_regions + +import tftools.data +from tftools import var_collect, placeholder_management +from object_attribute_classifier import inference +from word2vec.word_vector_management import word_vector_manager + +import losses +import constants + +import tensorflow as tf + +tb_log_dir = constants.tb_log_dir + +batch_size = constants.region_batch_size +num_samples = constants.region_num_samples +num_epochs = constants.region_num_epochs +offset = constants.region_offset +queue_size = constants.region_queue_size + +im_h, im_w = constants.image_size +num_object_labels = constants.num_object_labels +num_attribute_labels = constants.num_attribute_labels + +regularization_coeff = constants.region_regularization_coeff +lr = constants.region_lr +log_every_n_iter = constants.region_log_every_n_iter + +output_dir = constants.region_output_dir + +model = constants.region_model +resnet_model = constants.resnet_ckpt + +class graph_creator(): + def __init__(self, training=True): + self.tf_graph = tf.Graph() + with self.tf_graph.as_default(): + self.create_placeholders() + self.word_vec_mgr = word_vector_manager() + self.obj_atr_inference = inference.ObjectAttributeInference( + self.plh['region_images'], + self.word_vec_mgr.object_label_vectors, + self.word_vec_mgr.attribute_label_vectors, + training) + self.add_losses() + self.vars_to_save = tf.all_variables() + self.merged = tf.merge_all_summaries() + self.writer = tf.train.SummaryWriter( + tb_log_dir, + graph = self.tf_graph) + + def create_placeholders(self): + self.plh = placeholder_management.PlaceholderManager() + + self.plh.add_placeholder( + 'region_images', + tf.float32, + shape=[None, im_h, im_w, 3]) + + self.plh.add_placeholder( + 'object_labels', + tf.float32, + shape=[None, num_object_labels]) + + self.plh.add_placeholder( + 'attribute_labels', + tf.float32, + shape=[None, num_attribute_labels]) + + def add_losses(self): + self.object_loss = losses.object_loss( + self.obj_atr_inference.object_scores, + self.plh['object_labels']) + + self.attribute_loss = losses.attribute_loss( + self.obj_atr_inference.attribute_scores, + self.plh['attribute_labels']) + + self.regularization_loss = self.regularization() + + self.total_loss = self.object_loss + \ + self.attribute_loss + \ + self.regularization_loss + total_loss_summary = tf.scalar_summary( + "total_loss", + self.total_loss) + + + def regularization(self): + vars_to_regularize = tf.get_collection('to_regularize') + loss = losses.regularization_loss( + vars_to_regularize, + regularization_coeff) + return loss + + +def create_initializer(graph, sess): + class initializer(): + def __init__(self): + with graph.tf_graph.as_default(): + resnet_vars = graph.obj_atr_inference.resnet_vars + resnet_restorer = tf.train.Saver(resnet_vars) + resnet_restorer.restore(sess, resnet_model) + not_to_init = resnet_vars + all_vars = tf.all_variables() + other_vars = [var for var in all_vars + if var not in not_to_init] + var_collect.print_var_list( + other_vars, + 'vars_to_init') + self.init = tf.initialize_variables(other_vars) + + def initialize(self): + sess.run(self.init) + + return initializer() + + +def create_batch_generator(): + data_mgr = cropped_regions.data( + constants.image_dir, + constants.object_labels_json, + constants.attribute_labels_json, + constants.regions_json, + constants.image_size, + channels=3, + mean_image_filename=None) + + index_generator = tftools.data.random( + batch_size, + num_samples, + num_epochs, + offset) + + batch_generator = tftools.data.async_batch_generator( + data_mgr, + index_generator, + queue_size) + + return batch_generator + + +def create_feed_dict_creator(plh): + def feed_dict_creator(batch): + inputs = { + 'region_images': batch['region_images'], + 'object_labels': batch['object_labels'], + 'attribute_labels': batch['attribute_labels'] + } + return plh.get_feed_dict(inputs) + + return feed_dict_creator + + +class attach_optimizer(): + def __init__(self, graph): + with graph.tf_graph.as_default(): + resnet_vars = graph.obj_atr_inference.resnet_vars + all_trainable_vars = tf.trainable_variables() + not_to_train = resnet_vars + \ + [graph.word_vec_mgr.word_vectors] + vars_to_train = [ + var for var in all_trainable_vars + if var not in not_to_train] + var_collect.print_var_list( + vars_to_train, + 'vars_to_train') + self.ops = dict() + self.add_adam_optimizer( + graph.total_loss, + vars_to_train, + 'all_but_resnet') + + def add_adam_optimizer(self, loss, var_list, name): + train_step = tf.train.AdamOptimizer(lr) \ + .minimize( + loss, + var_list = var_list) + self.ops[name] = train_step + + +class log_mgr(): + def __init__( + self, + graph, + vars_to_save, + sess, + log_every_n_iter, + output_dir, + model_path): + self.graph = graph + self.vars_to_save = vars_to_save + self.sess = sess + self.log_every_n_iter = log_every_n_iter + self.output_dir = output_dir + self.model_path = model_path + + self.model_saver = tf.train.Saver( + var_list = vars_to_save, + max_to_keep = 0) + + self.loss_values = dict() + + def log(self, iter, is_last=False, eval_vars_dict=None): + if eval_vars_dict: + self.graph.writer.add_summary( + eval_vars_dict['merged'], + iter) + print 'object' + print np.max(eval_vars_dict['object_prob'][0,:]) + print np.min(eval_vars_dict['object_prob'][0,:]) + print np.max(eval_vars_dict['object_scores'][0,:]) + print np.min(eval_vars_dict['object_scores'][0,:]) + print 'attribute' + print np.max(eval_vars_dict['attribute_prob'][0,:]) + print np.min(eval_vars_dict['attribute_prob'][0,:]) + print np.max(eval_vars_dict['attribute_scores'][0,:]) + print np.min(eval_vars_dict['attribute_scores'][0,:]) + self.loss_values[str(iter)] = { + 'total_loss': str(eval_vars_dict['total_loss']), + 'object_loss': str(eval_vars_dict['object_loss']), + 'attribute_loss': str(eval_vars_dict['attribute_loss'])} + + if iter % self.log_every_n_iter==0 or is_last: + self.model_saver.save( + self.sess, + self.model_path, + global_step=iter) + + loss_path = os.path.join( + self.output_dir, + 'losses_' + str(iter) + '.json') + + with open(loss_path, 'w') as outfile: + ujson.dump( + self.loss_values, + outfile, + sort_keys=True, + indent=4) + + +def train( + batch_generator, + sess, + initializer, + vars_to_eval_dict, + feed_dict_creator, + logger): + + vars_to_eval_names = [] + vars_to_eval = [] + for var_name, var in vars_to_eval_dict.items(): + vars_to_eval_names += [var_name] + vars_to_eval += [var] + + with sess.as_default(): + initializer.initialize() + + iter = 0 + for batch in batch_generator: + print iter + feed_dict = feed_dict_creator(batch) + eval_vars = sess.run( + vars_to_eval, + feed_dict = feed_dict) + eval_vars_dict = { + var_name: eval_var for var_name, eval_var in + zip(vars_to_eval_names, eval_vars)} + logger.log(iter, False, eval_vars_dict) + iter+=1 + + logger.log(iter-1, True, eval_vars_dict) + +if __name__=='__main__': + print 'Creating batch generator...' + batch_generator = create_batch_generator() + print 'Creating computation graph...' + graph = graph_creator() + print 'Attaching optimizer...' + optimizer = attach_optimizer(graph) + print 'Starting a session...' + sess = tf.Session(graph=graph.tf_graph) + print 'Creating initializer...' + initializer = create_initializer(graph, sess) + print 'Creating feed dict creator...' + feed_dict_creator = create_feed_dict_creator(graph.plh) + print 'Creating dict of vars to be evaluated...' + vars_to_eval_dict = { + 'object_prob': graph.obj_atr_inference.object_prob, + 'object_scores': graph.obj_atr_inference.object_scores, + 'attribute_prob': graph.obj_atr_inference.attribute_prob, + 'attribute_scores': graph.obj_atr_inference.attribute_scores, + 'attribute_embed': graph.obj_atr_inference.attribute_embed, + 'avg_pool_feat': graph.obj_atr_inference.avg_pool_feat, + 'total_loss': graph.total_loss, + 'object_loss': graph.object_loss, + 'attribute_loss': graph.attribute_loss, + 'optimizer_op': optimizer.ops['all_but_resnet'], + 'merged': graph.merged, + } + print 'Creating logger...' + vars_to_save = graph.vars_to_save + logger = log_mgr( + graph, + vars_to_save, + sess, + log_every_n_iter, + output_dir, + model) + + print 'Start training...' + train( + batch_generator, + sess, + initializer, + vars_to_eval_dict, + feed_dict_creator, + logger) + diff --git a/resnet/inference.py b/resnet/inference.py index 012a6d9..62d3e30 100644 --- a/resnet/inference.py +++ b/resnet/inference.py @@ -78,10 +78,10 @@ def inference(x, is_training, # post-net x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") - - # if num_classes != None: - # with tf.variable_scope('fc'): - # x = fc(x, c) + + if num_classes != None: + with tf.variable_scope('fc'): + x = fc(x, c) return x diff --git a/test_resnet_inference.py b/test_resnet_inference.py index 763ecbe..6252e87 100644 --- a/test_resnet_inference.py +++ b/test_resnet_inference.py @@ -27,7 +27,7 @@ if __name__=='__main__': 'Resnet/tensorflow-resnet-pretrained-20160509' ckpt_filename = os.path.join(model_dir, 'ResNet-L50.ckpt') - img = image_io.imread("/home/tanmay/Code/GenVQA/GenVQA/resnet/dalmatian.jpg") + img = image_io.imread("/home/tanmay/Code/GenVQA/GenVQA/resnet/schooner.jpg") img = image_io.imresize(img, output_size=(224,224)) img = img.astype(np.float32) diff --git a/tftools/var_collect.py b/tftools/var_collect.py index 3900633..9a0601d 100644 --- a/tftools/var_collect.py +++ b/tftools/var_collect.py @@ -2,7 +2,7 @@ import tensorflow as tf def print_var_list(var_list, name='Variables'): - print name + ': \n' + '[' + ', '.join([var.name for var in var_list]) + ']' + print name + ': \n' + '[' + ',\n '.join([var.name for var in var_list]) + ']' def collect_name(var_name, graph=None): diff --git a/word2vec/get_vocab_word_vectors.py b/word2vec/get_vocab_word_vectors.py index 1539b21..6d14251 100644 --- a/word2vec/get_vocab_word_vectors.py +++ b/word2vec/get_vocab_word_vectors.py @@ -1,6 +1,6 @@ from gensim.models import word2vec import numpy as np -import json +import ujson import pdb import constants @@ -16,7 +16,7 @@ def get_vocab_word_vectors( vocab_word_vectors = 2*np.random.rand( vocab_size, constants.word_vector_size) - vocab_word_vectors -= 0.5 + vocab_word_vectors -= 1.0 found_word_vec = 0 for word, index in vocab.items(): @@ -25,7 +25,7 @@ def get_vocab_word_vectors( vocab_word_vectors[index,:] = model[word] np.save( - constants.vocab_word_vectors_npy, + constants.pretrained_vocab_word_vectors_npy, vocab_word_vectors) print 'Found word vectors for {} out of {} words'.format( @@ -39,7 +39,9 @@ if __name__=='__main__': binary=True) with open(constants.vocab_json, 'r') as file: - vocab = json.load(file) + vocab = ujson.load(file) get_vocab_word_vectors(model, vocab) + + diff --git a/word2vec/word_vector_management.py b/word2vec/word_vector_management.py new file mode 100644 index 0000000..799231b --- /dev/null +++ b/word2vec/word_vector_management.py @@ -0,0 +1,107 @@ +import numpy as np +import pdb +import json + +from tftools import var_collect, placeholder_management, layers +import constants + +import tensorflow as tf + + +class word_vector_manager(): + def __init__(self): + self.vocab_word_vectors = np.load( + constants.pretrained_vocab_word_vectors_npy) + self.vocab_word_vectors = self.vocab_word_vectors.astype(np.float32) + self.vocab_size = self.vocab_word_vectors.shape[0] + + self.read_object_labels() + self.read_attribute_labels() + self.read_vocab() + + with tf.variable_scope('word_vectors') as word_vectors: + self.init_word_vector_tensor() + self.normalized_word_vectors = tf.nn.l2_normalize( + self.word_vectors, 1) + + with tf.variable_scope('object_label_word_vectors'): + self.object_label_word_vectors() + + with tf.variable_scope('attribute_label_word_vectors'): + self.attribute_label_word_vectors() + + + def init_word_vector_tensor(self): + self.word_vectors = tf.get_variable( + name = 'word_vector', + shape = [self.vocab_size, constants.word_vector_size], + initializer = tf.constant_initializer(self.vocab_word_vectors)) + + tf.add_to_collection('to_regularize', self.word_vectors) + + def read_object_labels(self): + with open(constants.object_labels_json, 'r') as file: + self.object_labels = json.load(file) + + def read_attribute_labels(self): + with open(constants.attribute_labels_json, 'r') as file: + self.attribute_labels = json.load(file) + + def read_vocab(self): + with open(constants.vocab_json, 'r') as file: + self.vocab = json.load(file) + + def create_phrase_word_vectors(self, phrase, scope_name): + with tf.variable_scope(scope_name) as phrase_graph: + words = phrase.split(" ") + ids = [] + for word in words: + if word in self.vocab: + ids += [self.vocab[word]] + else: + ids += [self.vocab[constants.unknown_token]] + + phrase_word_vector = tf.nn.embedding_lookup( + self.normalized_word_vectors, + tf.constant(ids, dtype=tf.int64), + name = 'embedding_lookup') + + phrase_word_vector = tf.reduce_mean( + phrase_word_vector, + 0, + True, + 'reduce_mean') + + return phrase_word_vector + + def object_label_word_vectors(self): + inv_object_labels = {v: k for k, v in self.object_labels.items()} + num_object_labels = len(inv_object_labels) + + object_label_vector_list = [None]*num_object_labels + for i in xrange(num_object_labels): + object_label_vector_list[i] = self.create_phrase_word_vectors( + inv_object_labels[i], + 'object_label_' + str(i)) + + self.object_label_vectors = tf.concat( + 0, object_label_vector_list) + + def attribute_label_word_vectors(self): + inv_attribute_labels = {v: k for k, v in self.attribute_labels.items()} + num_attribute_labels = len(inv_attribute_labels) + + attribute_label_vector_list = [None]*num_attribute_labels + for i in xrange(num_attribute_labels): + attribute_label_vector_list[i] = self.create_phrase_word_vectors( + inv_attribute_labels[i], + 'attribute_label_' + str(i)) + + self.attribute_label_vectors = tf.concat( + 0, attribute_label_vector_list) + +if __name__=='__main__': + word_vector_mgr = word_vector_manager() + + + -- GitLab