diff --git a/answer_classifier/__init__.py b/answer_classifier/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/answer_classifier/inference.py b/answer_classifier/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..95c02880bb512198f1d63a0371ee4734cb7b72d3 --- /dev/null +++ b/answer_classifier/inference.py @@ -0,0 +1,79 @@ +import tensorflow as tf + +class AnswerInference(): + def __init__( + self, + object_feat, + attribute_feat, + answer_region_scores, + question_bins, + answers, + space_dim): + + self.object_feat = object_feat + self.attribute_feat = attribute_feat + self.answer_region_scores = answer_region_scores + self.question_bins = question_bins + self.answers = answers + + self.question = self.concat_question_answer() + + self.question_proj = self.project_to_common_space( + self.question, + space_dim, + 'question_projection') + + self.answers_proj = self.project_to_common_space( + self.answers, + space_dim, + 'answers_projection') + + self.object_attribute = self.concat_object_attribute( + self.object_feat, + self.attribute_feat) + + self.object_attribute_proj = self.project_to_common_space( + self.object_attribute, + space_dim, + 'object_attribute_projection') + + self.question_object_attribute_score = tf.matmul( + self.object_attribute_proj, + tf.transpose(self.question_proj)) + + self.answer_object_attribute_score = tf.matmul( + self.object_attribute_proj, + tf.tranpose(self.answer_proj)) + + self.region_answer_score = \ + self.question_object_attribute_score + \ + self.answer_object_attribute_score + + self.answer_score = tf.matmul( + self.answer_region_score, + self.region_answer_score) + + def concat_question(self): + question = tf.concat( + 1, + [question_bins['bin_0'], question_bins['bin_1'], + question_bins['bin_2'], question_bins['bin_3']]) + + return question + + def concat_object_attribute(self, object_feat, attribute_feat): + object_attribute = tf.concat( + 1, + [object_feat, attribute_feat]) + + return object_attribute + + def project_to_common_space(self, x, space_dim, scope_name): + with tf.variable_scope(scope_name): + in_dim = x.get_shape().as_list()[-1] + W = tf.get_variable( + 'W', + [in_dim, space_dim]) + projected_x = tf.matmul(x, W) + + return projected_x diff --git a/answer_classifier/train.py b/answer_classifier/train.py new file mode 100644 index 0000000000000000000000000000000000000000..f8523519c49dcbca7ebd9598c459565634879e16 --- /dev/null +++ b/answer_classifier/train.py @@ -0,0 +1,121 @@ +from word2vec.word_vector_management import word_vector_manager +import object_attribute_classifier.inference as feature_graph +import region_relevance_network.inference as relevance_graph +from tftools import var_collect, placeholder_management + + +class graph_creator(): + def __init__( + self, + tb_log_dir, + image_size, + num_neg_answers, + training=True): + self.im_h, self.im_w = image_size + self.num_neg_answers = num_neg_answers + + self.create_placeholders() + + self.word_vec_mgr = word_vector_manager() + + self.get_question_embeddings() + + self.answers_embed = self.get_answer_embeddings() + + self.question_embed = self.get_question_embeddings() + + self.obj_atr_inference = feature_graph.ObjectAttributeInference( + self.plh['region_images'], + self.word_vec_mgr.object_label_vectors, + self.word_vec_mgr.attribute_label_vectors, + training) + + self.object_feat = self.obj_atr_inference.object_embed + self.attribute_feat = self.obj_atr_inference.attribute_embed + + self.relevance_inference = relevance_graph.RegionRelevanceInference( + self.object_feat, + self.attribute_feat, + {'bin_0': self.plh['bin_0'], 'bin_1': self.plh['bin_1'], + 'bin_2': self.plh['bin_2'], 'bin_3': self.plh['bin_3']}, + + ) + + def create_placeholders(self): + self.plh = placeholder_management.PlaceholderManager() + + self.plh.add_placeholder( + 'region_images', + tf.float32, + shape=[None, self.im_h, self.im_w, 3]) + + self.plh_answers = dict() + for i in xrange(self.num_neg_answers): + answer_name = 'negative_answer_' + str(i) + self.plh.add_placeholder( + answer_name, + tf.int64, + shape=[None]) + self.plh_answers[answer_name] = self.plh[answer_name] + + self.plh.add_placeholder( + 'positive_answer', + tf.int64, + shape=[None]) + self.plh_answers['positive_answer'] = self.plh['positive_answer'] + + self.plh_question_bins = dict() + for i in xrange(4): + bin_name = 'bin_' + str(i) + self.plh.add_placeholder( + bin_name, + tf.int64, + shape=[None]) + self.plh_question_bins[bin_name] = self.plh[bin_name] + + def get_question_embeddings(self): + with tf.variable_scope('question_bin_embed'): + question_bin_embed = dict() + for i in xrange(4): + bin_name = 'bin_' + str(i) + question_bin_embed = self.lookup_word_embeddings( + self.plh[bin_name], + bin_name) + + + return question_bin_embed + + def get_answer_embeddings(self): + with tf.variable_scope('answers_embed'): + answers_embed = dict() + for i in xrange(self.num_neg_answers): + answer_name = 'negative_answer_' + str(i) + answers_embed[answer_name] = self.lookup_word_embeddings( + self.plh[answer_name], + answer_name) + + answer_name = 'positive_answer' + answers_embed[answer_name] = self.lookup_word_embeddings( + self.plh[answer_name], + answer_name) + + return answers_embed + + def lookup_word_embeddings(self, index_list, name): + with tf.variable_scope(name): + word_vectors = tf.nn.embedding_lookup( + self.word_vec_mgr.normalized_word_vectors + tf.constant(index_list, dtype=tf.int64), + name = 'embedding_lookup') + + embedding = tf.reduce_mean( + word_vectors, + 0, + True, + 'reduce_mean') + + return embedding + + + + diff --git a/constants.py b/constants.py index 84209f5591c8d3bad1755d3197a6bb589d2a1210..c9745269a91e5ae32db524cc665c4eb528d5b4c9 100644 --- a/constants.py +++ b/constants.py @@ -5,7 +5,7 @@ def mkdir_if_not_exists(dir_name): if not os.path.exists(dir_name): os.mkdir(dir_name) -experiment_name = 'object_attribute_with_compensation_without_word_vector_classification' +experiment_name = 'object_attribute_with_compensation_with_word_vector_prediction_high_lr' # Global output directory (all subexperiments will be saved here) global_output_dir = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome' @@ -101,12 +101,12 @@ region_model = os.path.join( region_output_dir, 'model') -region_fine_tune_from_iter = 66500 +region_fine_tune_from_iter = 20500 region_fine_tune_from = region_model + '-' + str(region_fine_tune_from_iter) # Object Attribute Classifier Evaluation Params region_eval_on = 'val' # One of {'val','test','train'} -region_model_to_eval = region_model + '-' + '66500' +region_model_to_eval = region_model + '-' + '58000' region_attribute_scores_dirname = os.path.join( region_output_dir, @@ -116,5 +116,18 @@ mkdir_if_not_exists(region_attribute_scores_dirname) # Answer prediction num_region_proposals = 100 -num_mcq_candidate = 18 +num_mcq_candidates = 18 +num_negative_answers = num_mcq_candidates - 1 + +# VQA data paths +vqa_basedir = '/home/ssd/VQA/' +vqa_train_image_dir = os.path.join( + vqa_basedir, + 'train2014_cropped') +vqa_train_anno = os.path.join( + vqa_basedir, + 'mscoco_train2014_annotations_with_parsed_questions.json') +vqa_answer_vocab_json = os.path.join( + vqa_basedir, + 'answer_vocab.json') diff --git a/data/#vqa.py# b/data/#vqa.py# deleted file mode 100644 index 130feff3663a6d51fe8aff6835482867a351ab7d..0000000000000000000000000000000000000000 --- a/data/#vqa.py# +++ /dev/null @@ -1,224 +0,0 @@ -import numpy as np -import ujson -import os -import pdb -import time -import threading - -import tftools.data -import image_io -import constants - -import tensorflow as tf - -_unknown_token = constants.unknown_token - -def unwrap_self_get_single(arg, **kwarg): - return data.get_single(*arg, **kwarg) - -class data(): - def __init__(self, - image_dir, - anno_json, - vocab_json, - image_size, - num_region_proposals, - num_ans_candidates, - channels=3, - mean_image_filename=None): - self.image_dir = image_dir - self.h = image_size[0] - self.w = image_size[1] - self.c = channels - self.num_region_proposals = num_region_proposals - self.num_ans_candidates = num_ans_candidates - self.anno = self.read_json_file(anno_json) - self.vocab = self.read_json_file(vocab_json) - self.inv_vocab = self.invert_label_dict(self.vocab) - self.num_questions = len(self.anno) - self.create_sample_to_question_dict() - - def create_sample_to_question_dict(self): - self.sample_to_question_dict = \ - {k: v for k, v in zip(xrange(self.num_questions), - self.anno.keys())} - - def invert_label_dict(self, label_dict): - return {v: k for k, v in label_dict.items()} - - def read_json_file(self, filename): - print 'Reading {} ...'.format(filename) - with open(filename, 'r') as file: - return ujson.load(file) - - def get_single_image(self, sample, region_number, batch_list, worker_id): - try: - batch = dict() - question_id = self.sample_to_question_dict[sample] - region_image, read_success = self.get_region_image( - sample, - region_number) - if not read_success: - region_image = np.zeros( - [self.h, self.w, self.c], np.float32) - - batch_list[worker_id] = region_image - - except Exception, e: - print 'Error in thread {}: {}'.format( - threading.current_thread().name, str(e)) - - def get_parallel(self, samples): - batch_list = [None]*len(samples)*self.num_region_proposals - worker_ids = range(len(samples)*self.num_region_proposals) - workers = [] - for count, sample in enumerate(samples): - for i in xrange(self.num_region_proposals): - worker = threading.Thread( - target = self.get_single_image, - args = ( - sample, - i, - batch_list, - worker_ids[count*self.num_region_proposals + i])) - worker.setDaemon(True) - worker.start() - workers.append(worker) - - for worker in workers: - worker.join() - - batch_size = len(samples) - batch = dict() - batch['region_ids'] = dict() - batch['region_images'] = np.zeros( - [batch_size*self.num_region_proposals, self.h, self.w, self.c], - np.float32) - batch['candidate_ans_labels'] = - batch['answer_labels']= np.zeros( - [batch_size, self.num_ans_candidates], np.float32) - batch['answer_margins'] = np.zeros( - [batch_size, self.num_ans_candidates], np.float32) - - for index, single_batch in enumerate(batch_list): - batch['region_ids'][index] = single_batch['region_id'] - batch['region_images'][index, :, :, :] = single_batch['region_image'] - batch['object_labels'][index, :] = single_batch['object_label'] - batch['attribute_labels'][index,:] = single_batch['attribute_label'] - - return batch - - def get_region_image(self, sample, region_number): - question_id = self.sample_to_question_dict[sample] - image_id = self.anno[str(question_id)]['image_id'] - image_subdir = os.path.join( - self.image_dir, - self.image_prefix) - - filename = os.path.join(image_subdir, - str(region_number) + '.jpg') - read_success = True - try: - region_image = image_io.imread(filename) - region_image = region_image.astype(np.float32) - except: - print 'Could not read image: Setting the image pixels to 0s' - read_success = False - region_image = np.zeros([self.h, self.w, 3], dtype=np.float32) - - return region_image, read_success - - def get_object_label(self, sample): - # Returns a multihot vector encoding of object labels - # If an object label is not found in the labels list, - # _unknown_token is produced in that case. - region_id = self.sample_to_region_dict[sample] - region = self.regions[region_id] - object_labels = region['object_names'] - object_label_encoding = np.zeros([1, self.num_object_labels], - dtype = np.float32) - if object_labels: - for object in object_labels: - if object not in self.object_labels_dict: - label_id = self.object_labels_dict[_unknown_token] - else: - label_id = self.object_labels_dict[object] - object_label_encoding[0,label_id] = 1.0 - else: - label_id = self.object_labels_dict[_unknown_token] - object_label_encoding[0,label_id] = 1.0 - - return object_label_encoding/np.sum(object_label_encoding), object_labels - - def get_attribute_label(self, sample): - # Attribute is turned on if it is present - region_id = self.sample_to_region_dict[sample] - region = self.regions[region_id] - attribute_labels = region['attributes'] - attribute_label_encoding = np.zeros([1, self.num_attribute_labels], - dtype = np.float32) - for attribute in attribute_labels: - if attribute in self.attribute_labels_dict: - label_id = self.attribute_labels_dict[attribute] - attribute_label_encoding[0,label_id] = 1.0 - - return attribute_label_encoding, attribute_labels - -if __name__=='__main__': - data_mgr = data(constants.image_dir, - constants.object_labels_json, - constants.attribute_labels_json, - constants.regions_json, - constants.image_size, - channels=3, - mean_image_filename=None) - print 'Number of object labels: {}'.format(data_mgr.num_object_labels) - print 'Number of attribute labels: {}'.format(data_mgr.num_attribute_labels) - print 'Number of regions: {}'.format(data_mgr.num_regions) - - #Test sample - samples = [1, 2] - sample = samples[0] - region_id = data_mgr.sample_to_region_dict[sample] - region = data_mgr.regions[region_id] - attribute_encoding = data_mgr.get_attribute_label(sample) - object_encoding = data_mgr.get_object_label(sample) - region_image = data_mgr.get_region_image(sample) - - attributes = [] - for i in xrange(attribute_encoding.shape[1]): - if attribute_encoding[0,i] > 0 : - attributes.append(data_mgr.inv_attribute_labels_dict[i]) - - objects = [] - for i in xrange(object_encoding.shape[1]): - if object_encoding[0,i] > 0 : - objects.append(data_mgr.inv_object_labels_dict[i]) - - print "Region: {}".format(region) - print "Attributes: {}".format(", ".join(attributes)) - print "Objects: {}".format(", ".join(objects)) - - batch_size = 200 - num_samples = 200 - num_epochs = 1 - offset = 0 - queue_size = 100 - - index_generator = tftools.data.random( - batch_size, - num_samples, - num_epochs, - offset) - - batch_generator = tftools.data.async_batch_generator( - data_mgr, - index_generator, - queue_size) - - count = 0 - for batch in batch_generator: - print 'Batch Number: {}'.format(count) - count += 1 - - diff --git a/data/.#vqa.py b/data/.#vqa.py deleted file mode 120000 index 7c92c0f7a0a73d2ffbe0a3f7914381d843a140cc..0000000000000000000000000000000000000000 --- a/data/.#vqa.py +++ /dev/null @@ -1 +0,0 @@ -tanmay@crunchy.26789:1465923137 \ No newline at end of file diff --git a/data/vqa.py b/data/vqa.py index 746f5ce30e254628fde1dfba93652093be7a7804..dae5ba9cad753e9795834ff42de07316f7273154 100644 --- a/data/vqa.py +++ b/data/vqa.py @@ -1,6 +1,7 @@ import numpy as np import ujson import os +import re import pdb import time import threading @@ -21,20 +22,25 @@ class data(): image_dir, anno_json, vocab_json, + ans_vocab_json, image_size, num_region_proposals, - num_ans_candidates, + num_neg_answers, channels=3, + mode='mcq', mean_image_filename=None): self.image_dir = image_dir self.h = image_size[0] self.w = image_size[1] self.c = channels - self.num_region_proposals = num_region_proposals, - self.num_ans_candidates = num_ans_candidates + self.mode = mode + self.num_region_proposals = num_region_proposals + self.num_neg_answers = num_neg_answers self.anno = self.read_json_file(anno_json) self.vocab = self.read_json_file(vocab_json) + self.ans_vocab = self.read_json_file(ans_vocab_json) self.inv_vocab = self.invert_label_dict(self.vocab) + self.inv_ans_vocab = self.invert_label_dict(self.ans_vocab) self.num_questions = len(self.anno) self.create_sample_to_question_dict() @@ -51,6 +57,45 @@ class data(): with open(filename, 'r') as file: return ujson.load(file) + def get_parallel(self, sample): + region_list = [None]*self.num_region_proposals + worker_ids = range(self.num_region_proposals) + workers = [] + for i in xrange(self.num_region_proposals): + worker = threading.Thread( + target = self.get_single_image, + args = ( + sample, + i, + region_list, + worker_ids[i])) + worker.setDaemon(True) + worker.start() + workers.append(worker) + + for worker in workers: + worker.join() + + batch_size = 1 + batch = dict() + batch['region_images'] = np.zeros( + [self.num_region_proposals, self.h, self.w, self.c], + np.float32) + batch['positive_answer']= [] + batch['negative_answers'] = [] + batch['question']= [] + for index, single_region in enumerate(region_list): + batch['region_images'][index, :, :, :] = single_region + + batch['question'].append(self.get_question(sample)) + batch['positive_answer'].append(self.get_positive_answer(sample, self.mode)) + + negative_answers = self.get_negative_answers(sample, self.mode) + for negative_answer in negative_answers: + batch['negative_answers'].append(negative_answer) + + return batch + def get_single_image(self, sample, region_number, batch_list, worker_id): try: batch = dict() @@ -58,6 +103,7 @@ class data(): region_image, read_success = self.get_region_image( sample, region_number) + if not read_success: region_image = np.zeros( [self.h, self.w, self.c], np.float32) @@ -68,159 +114,168 @@ class data(): print 'Error in thread {}: {}'.format( threading.current_thread().name, str(e)) - def get_parallel(self, samples): - batch_list = [None]*len(samples)*self.num_region_proposals - worker_ids = range(len(samples)*self.num_region_proposals) - workers = [] - for count, sample in enumerate(samples): - for i in xrange(self.num_region_proposals): - worker = threading.Thread( - target = self.get_single_image, - args = ( - sample, - i, - batch_list, - worker_ids[count*self.num_region_proposals + i])) - worker.setDaemon(True) - worker.start() - workers.append(worker) - - for worker in workers: - worker.join() - - batch_size = len(samples) - batch = dict() - batch['region_ids'] = dict() - batch['region_images'] = np.zeros( - [batch_size*self.num_region_proposals, self.h, self.w, self.c], - np.float32) - if self.mcq: - batch['candidate_ans_labels'] = np.zeros( - [batch_size, ] - batch['object_labels']= np.zeros( - [batch_size, len(self.object_labels_dict)], np.float32) - batch['attribute_labels'] = np.zeros( - [batch_size, len(self.attribute_labels_dict)], np.float32) - - for index, single_batch in enumerate(batch_list): - batch['region_ids'][index] = single_batch['region_id'] - batch['region_images'][index, :, :, :] = single_batch['region_image'] - batch['object_labels'][index, :] = single_batch['object_label'] - batch['attribute_labels'][index,:] = single_batch['attribute_label'] - - return batch - def get_region_image(self, sample, region_number): question_id = self.sample_to_question_dict[sample] image_id = self.anno[str(question_id)]['image_id'] image_subdir = os.path.join( self.image_dir, - self.image_prefix) + 'COCO_train2014_' + str(image_id).zfill(12)) filename = os.path.join(image_subdir, - str(region_number) + '.jpg') + str(region_number+1) + '.jpg') read_success = True try: region_image = image_io.imread(filename) region_image = region_image.astype(np.float32) except: - print 'Could not read image: Setting the image pixels to 0s' + print 'Could not read image {}: Setting the image pixels to 0s'.format( + filename) read_success = False region_image = np.zeros([self.h, self.w, 3], dtype=np.float32) return region_image, read_success + + def get_question(self, sample): + question_id = self.sample_to_question_dict[sample] + parsed_question = self.anno[question_id]['parsed_question'] + print parsed_question + encoded_parsed_question = dict() + for bin, words in parsed_question.items(): + encoded_parsed_question[bin] = self.encode_sentence(words) + return encoded_parsed_question + + def get_positive_answer(self, sample, mode='mcq'): + question_id = self.sample_to_question_dict[sample] + if mode=='mcq': + positive_answer = self.anno[question_id]['multiple_choice_answer'].lower() + print positive_answer + return self.encode_sentence(positive_answer) + + answers = self.anno[question_id]['answers'] + answer_counts = dict() + for answer in answers: + answer_lower = answer['answer'].lower() + if answer not in answer_counts: + answer_counts[answer_lower] = 1 + else: + answer_counts[answer_lower] += 1 + + popular_answer = '' + current_count = 0 + for answer, count in answer_counts.items(): + if count > current_count: + popular_answer = answer + current_count = count + + return self.encoded_answer(popular_answer) - def get_object_label(self, sample): - # Returns a multihot vector encoding of object labels - # If an object label is not found in the labels list, - # _unknown_token is produced in that case. - region_id = self.sample_to_region_dict[sample] - region = self.regions[region_id] - object_labels = region['object_names'] - object_label_encoding = np.zeros([1, self.num_object_labels], - dtype = np.float32) - if object_labels: - for object in object_labels: - if object not in self.object_labels_dict: - label_id = self.object_labels_dict[_unknown_token] - else: - label_id = self.object_labels_dict[object] - object_label_encoding[0,label_id] = 1.0 + def get_negative_answers(self, sample, mode='mcq'): + question_id = self.sample_to_question_dict[sample] + positive_answers = [] + for answer in self.anno[question_id]['answers']: + positive_answers.append(answer['answer'].lower()) + + if mode=='mcq': + multiple_choices = self.anno[question_id]['multiple_choices'] + remaining_answers = [ + ans.lower() for ans in multiple_choices if ans.lower() not in positive_answers] + sampled_negative_answers = remaining_answers + print sampled_negative_answers else: - label_id = self.object_labels_dict[_unknown_token] - object_label_encoding[0,label_id] = 1.0 + remaining_answers = [ + ans.lower() for ans in self.ans_vocab.keys() if ans.lower() not in positive_answers] + sampled_negative_answers = np.random.choice( + remaining_answers, + size=self.num_neg_answers, + replace=False) - return object_label_encoding/np.sum(object_label_encoding), object_labels + encoded_answers = [] + for answer in sampled_negative_answers: + encoded_answers.append(self.encode_sentence(answer)) - def get_attribute_label(self, sample): - # Attribute is turned on if it is present - region_id = self.sample_to_region_dict[sample] - region = self.regions[region_id] - attribute_labels = region['attributes'] - attribute_label_encoding = np.zeros([1, self.num_attribute_labels], - dtype = np.float32) - for attribute in attribute_labels: - if attribute in self.attribute_labels_dict: - label_id = self.attribute_labels_dict[attribute] - attribute_label_encoding[0,label_id] = 1.0 - - return attribute_label_encoding, attribute_labels + return encoded_answers + + def encode_sentence(self, sentence): + # Split into words with only characters and numbers + words = re.split('\W+',sentence.lower()) + + # Remove '' + words = [word for word in words if word!=''] + + # If no words are left put an unknown_token + if not words: + words = [constants.unkown_token] + + encoded_sentence = [] + for word in words: + if word not in self.vocab: + word = constants.unknown_token + encoded_sentence.append(int(self.vocab[word])) + + return encoded_sentence + if __name__=='__main__': - data_mgr = data(constants.image_dir, - constants.object_labels_json, - constants.attribute_labels_json, - constants.regions_json, - constants.image_size, - channels=3, - mean_image_filename=None) - print 'Number of object labels: {}'.format(data_mgr.num_object_labels) - print 'Number of attribute labels: {}'.format(data_mgr.num_attribute_labels) - print 'Number of regions: {}'.format(data_mgr.num_regions) - - #Test sample - samples = [1, 2] - sample = samples[0] - region_id = data_mgr.sample_to_region_dict[sample] - region = data_mgr.regions[region_id] - attribute_encoding = data_mgr.get_attribute_label(sample) - object_encoding = data_mgr.get_object_label(sample) - region_image = data_mgr.get_region_image(sample) - - attributes = [] - for i in xrange(attribute_encoding.shape[1]): - if attribute_encoding[0,i] > 0 : - attributes.append(data_mgr.inv_attribute_labels_dict[i]) - - objects = [] - for i in xrange(object_encoding.shape[1]): - if object_encoding[0,i] > 0 : - objects.append(data_mgr.inv_object_labels_dict[i]) + data_mgr = data( + constants.vqa_train_image_dir, + constants.vqa_train_anno, + constants.vocab_json, + constants.vqa_answer_vocab_json, + constants.image_size, + constants.num_region_proposals, + constants.num_negative_answers) + + for sample in xrange(100): + print sample + batch = data_mgr.get_parallel(sample) - print "Region: {}".format(region) - print "Attributes: {}".format(", ".join(attributes)) - print "Objects: {}".format(", ".join(objects)) - - batch_size = 200 - num_samples = 200 - num_epochs = 1 - offset = 0 - queue_size = 100 - - index_generator = tftools.data.random( - batch_size, - num_samples, - num_epochs, - offset) + # print 'Number of object labels: {}'.format(data_mgr.num_object_labels) + # print 'Number of attribute labels: {}'.format(data_mgr.num_attribute_labels) + # print 'Number of regions: {}'.format(data_mgr.num_regions) + + # #Test sample + # samples = [1, 2] + # sample = samples[0] + # region_id = data_mgr.sample_to_region_dict[sample] + # region = data_mgr.regions[region_id] + # attribute_encoding = data_mgr.get_attribute_label(sample) + # object_encoding = data_mgr.get_object_label(sample) + # region_image = data_mgr.get_region_image(sample) + + # attributes = [] + # for i in xrange(attribute_encoding.shape[1]): + # if attribute_encoding[0,i] > 0 : + # attributes.append(data_mgr.inv_attribute_labels_dict[i]) + + # objects = [] + # for i in xrange(object_encoding.shape[1]): + # if object_encoding[0,i] > 0 : + # objects.append(data_mgr.inv_object_labels_dict[i]) + + # print "Region: {}".format(region) + # print "Attributes: {}".format(", ".join(attributes)) + # print "Objects: {}".format(", ".join(objects)) + + # batch_size = 200 + # num_samples = 200 + # num_epochs = 1 + # offset = 0 + # queue_size = 100 + + # index_generator = tftools.data.random( + # batch_size, + # num_samples, + # num_epochs, + # offset) - batch_generator = tftools.data.async_batch_generator( - data_mgr, - index_generator, - queue_size) - - count = 0 - for batch in batch_generator: - print 'Batch Number: {}'.format(count) - count += 1 + # batch_generator = tftools.data.async_batch_generator( + # data_mgr, + # index_generator, + # queue_size) + + # count = 0 + # for batch in batch_generator: + # print 'Batch Number: {}'.format(count) + # count += 1 diff --git a/object_attribute_classifier/compute_AP.py b/object_attribute_classifier/compute_AP.py index c0a4e042e1b667099357510c0f8f372ce9bac8ef..b1e9beb1c573ea5d2131d73392b04b58f18b03fd 100644 --- a/object_attribute_classifier/compute_AP.py +++ b/object_attribute_classifier/compute_AP.py @@ -9,7 +9,7 @@ if __name__=='__main__': num_pos_examples = [] for i in xrange(10): dirname = '/home/tanmay/Code/GenVQA/Exp_Results/VisualGenome/' + \ - 'object_attribute_with_compensation_without_word_vector_classification/' + \ + 'object_attribute_with_compensation_with_word_vector_prediction_high_lr/' + \ 'object_attribute_classifiers/attribute_scores/' labels_filename = os.path.join(dirname, 'labels_' + str(i) + '.json') @@ -57,7 +57,6 @@ if __name__=='__main__': plt.clf() - pdb.set_trace() plt.plot(xrange(10), num_pos_examples, '-') figname = os.path.join( diff --git a/object_attribute_classifier/eval.py b/object_attribute_classifier/eval.py index e683551bbd05f53f116a6ae7d95e9d94a5340ad7..b6e6a0244d28c7b823bde88868a8397fdd193c47 100644 --- a/object_attribute_classifier/eval.py +++ b/object_attribute_classifier/eval.py @@ -289,10 +289,10 @@ if __name__=='__main__': sess, constants.region_model_to_eval) - moving_mean = var_collect.collect_name('scale5/block1/a/moving_mean:0', graph.tf_graph) - with sess.as_default(): - pdb.set_trace() - print moving_mean.eval() + # moving_mean = var_collect.collect_name('scale5/block1/a/moving_mean:0', graph.tf_graph) + # with sess.as_default(): + # pdb.set_trace() + # print moving_mean.eval() print 'Creating feed dict creator...' feed_dict_creator = train.create_feed_dict_creator(graph.plh) diff --git a/object_attribute_classifier/inference.py b/object_attribute_classifier/inference.py index ed27b8f45fb8ce45e056d78dc7647def661370c5..84c22639d3afb2b2963ea3cfa2ec9fa4e3449562 100644 --- a/object_attribute_classifier/inference.py +++ b/object_attribute_classifier/inference.py @@ -18,13 +18,14 @@ class ObjectAttributeInference(): training): self.image_regions = image_regions + self.object_label_vectors = object_label_vectors + self.attribute_label_vectors = attribute_label_vectors self.training = training self.avg_pool_feat = resnet_inference.inference( self.image_regions, - training, + True, num_classes=None) - self.avg_pool_feat = layers.batch_norm( self.avg_pool_feat, tf.constant(self.training)) @@ -34,8 +35,8 @@ class ObjectAttributeInference(): self.object_embed = self.add_object_graph(self.avg_pool_feat) self.attribute_embed = self.add_attribute_graph(self.avg_pool_feat) - self.object_label_embed = self.add_object_label_graph(object_label_vectors) - self.attribute_label_embed = self.add_attribute_label_graph(attribute_label_vectors) + # self.object_label_embed = self.add_object_label_graph(object_label_vectors) + # self.attribute_label_embed = self.add_attribute_label_graph(attribute_label_vectors) with tf.variable_scope('object_score_graph'): # Method 1 @@ -62,15 +63,26 @@ class ObjectAttributeInference(): # self.object_scores_bias # Method 2 - out_dim = self.object_label_embed.get_shape().as_list()[0] + # out_dim = self.object_label_embed.get_shape().as_list()[0] - self.object_scores = layers.full( - tf.nn.relu(self.object_embed), - out_dim, - 'object_fc', - func = None) + # self.object_scores = layers.full( + # tf.nn.relu(self.object_embed), + # out_dim, + # 'object_fc', + # func = None) + + # Method 3 + self.object_scores = tf.matmul( + self.object_embed, + tf.transpose(self.object_label_vectors)) + + self.object_biases = tf.get_variable( + 'object_biases', + shape = [1, self.object_scores.get_shape().as_list()[1]], + initializer = tf.constant_initializer(0.0)) + + self.object_scores += self.object_biases - self.object_prob = tf.nn.softmax( self.object_scores, name = 'object_prob') @@ -96,12 +108,24 @@ class ObjectAttributeInference(): # self.attribute_scores_bias # Method 2 - out_dim = self.attribute_label_embed.get_shape().as_list()[0] - self.attribute_scores = layers.full( - tf.nn.relu(self.attribute_embed), - out_dim, - 'attribute_fc', - func = None) + # out_dim = self.attribute_label_embed.get_shape().as_list()[0] + # self.attribute_scores = layers.full( + # tf.nn.relu(self.attribute_embed), + # out_dim, + # 'attribute_fc', + # func = None) + + # Method 3 + self.attribute_scores = tf.matmul( + self.attribute_embed, + tf.transpose(self.attribute_label_vectors)) + + self.attribute_biases = tf.get_variable( + 'attribute_biases', + shape = [1, self.attribute_scores.get_shape().as_list()[1]], + initializer = tf.constant_initializer(0.0)) + + self.attribute_scores += self.attribute_biases self.attribute_prob = tf.sigmoid( self.attribute_scores, @@ -131,7 +155,7 @@ class ObjectAttributeInference(): with tf.variable_scope('fc2') as fc2: in_dim = fc1_out.get_shape().as_list()[-1] - out_dim = in_dim/2 + out_dim = self.object_label_vectors.get_shape().as_list()[-1] fc2_out = layers.full( fc1_out, out_dim, @@ -157,7 +181,7 @@ class ObjectAttributeInference(): with tf.variable_scope('fc2') as fc2: in_dim = fc1_out.get_shape().as_list()[-1] - out_dim = in_dim/2 + out_dim = self.object_label_vectors.get_shape().as_list()[-1] fc2_out = layers.full( fc1_out, out_dim, diff --git a/object_attribute_classifier/train.py b/object_attribute_classifier/train.py index 5071542fef05dbda547f0a96e673e8fa4e3d3066..11a91f57516d0378ab64372b7bea576c33ba0150 100644 --- a/object_attribute_classifier/train.py +++ b/object_attribute_classifier/train.py @@ -36,7 +36,7 @@ class graph_creator(): self.plh['region_images'], self.word_vec_mgr.object_label_vectors, self.word_vec_mgr.attribute_label_vectors, - True) + training) self.add_losses() self.add_accuracy_computation() self.vars_to_save = tf.all_variables() @@ -218,7 +218,7 @@ class attach_optimizer(): with graph.tf_graph.as_default(): resnet_vars = graph.obj_atr_inference.resnet_vars all_trainable_vars = tf.trainable_variables() - self.not_to_train = [graph.word_vec_mgr.word_vectors] + self.not_to_train = [] #resnet_vars# + [graph.word_vec_mgr.word_vectors] vars_to_train = [ var for var in all_trainable_vars if var not in self.not_to_train] diff --git a/question_parser/simple_parser.py b/question_parser/simple_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..f54380479e38850666abfeb0bfdb5c965d823763 --- /dev/null +++ b/question_parser/simple_parser.py @@ -0,0 +1,21 @@ +from nltk.parse.stanford import StanfordDependencyParser +path_to_jar = '/home/tanmay/Code/GenVQA/GenVQA/question_parser/' + \ + 'stanford-parser-full-2015-12-09/stanford-parser.jar' +path_to_models_jar = '/home/tanmay/Code/GenVQA/GenVQA/question_parser/' + \ + 'stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models.jar' +dependency_parser = StanfordDependencyParser( + path_to_jar=path_to_jar, + path_to_models_jar=path_to_models_jar) + +result = dependency_parser.raw_parse('Potted Plant') +dep = result.next() +triples = list(dep.triples()) +for triple in triples: + head = triple[0] + relation = triple[1] + dependent = triple[2] + print '{} : {}, {}'.format( + relation, + head, + dependent) + diff --git a/region_relevance_network/__init__.py b/region_relevance_network/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/region_relevance_network/inference.py b/region_relevance_network/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..81df1625b03eb66108b0d50c7f4ec5c2cc2e3a08 --- /dev/null +++ b/region_relevance_network/inference.py @@ -0,0 +1,44 @@ +import tensorflow as tf + + +class RegionRelevanceInference(): + def __init__( + self, + object_feat, + attribute_feat, + question_bins, + answers): + + self.object_feat = object_feat + self.attribute_feat = attribute_feat + self.answers = answers + + with tf.variable_scope('region_relevance_graph'): + # Compute dot product with question bins + self.question_object_scores = tf.matmul( + self.question_bins, + tf.transpose(self.object_feat)) + + self.question_attribute_scores = tf.matmul( + self.question_bins, + tf.transpose(self.attribute_feat)) + + # Compute dot product with answers + self.answer_object_scores = tf.matmul( + self.answers, + tf.tranpose(self.object_feat)) + + self.answer_attribute_scores = tf.matmul( + self.answers, + tf.tranpose(self.attribute_feat)) + + # Computer final scores + self.answer_region_scores = \ + tf.square(self.question_object_scores) + \ + tf.square(self.question_attribute_scores) + \ + tf.square(self.answer_object_scores) + \ + tf.square(self.answer_attribute_scores) + + + +