Skip to content
Snippets Groups Projects
Commit 271e7575 authored by tgupta6's avatar tgupta6
Browse files

adding code for answer classifier and region ranker

parent 6e521fb3
No related branches found
No related tags found
No related merge requests found
Showing
with 491 additions and 16 deletions
File added
import json
import sys
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import tensorflow as tf
from scipy import misc
from collections import namedtuple
import region_ranker.perfect_ranker as region_proposer
qa_tuple = namedtuple('qa_tuple','image_id question answer')
def create_ans_dict():
ans_dict = {
'yes' : 0,
'no' : 1,
'red' : 2,
'green' : 3,
'blue' : 4,
'circle' : 5,
'rectangle': 6,
'triangle' : 7,
}
for i in range(0,10):
ans_dict[str(i)] = 8+i
inv_ans_dict = {v: k for k, v in ans_dict.items()}
return ans_dict, inv_ans_dict
def parse_qa_anno(json_filename):
with open(json_filename,'r') as json_file:
raw_data = json.load(json_file)
qa_dict = dict()
for entry in raw_data:
qa_dict[entry['question_id']] = qa_tuple(image_id = entry['image_id'],
question = entry['question'],
answer = entry['answer'])
return qa_dict
def get_vocab(qa_dict):
vocab = dict()
count = 0;
for key, value in qa_dict.items():
for word in value.question[0:-1].split():
if word.lower() not in vocab:
vocab[word.lower()] = count
count = count + 1
if value.answer.lower() not in vocab:
vocab[value.answer.lower()] = count
count = count + 1
vocab['unk'] = count
inv_vocab = {v: k for k, v in vocab.items()}
return vocab, inv_vocab
def ans_mini_batch_loader(qa_dict, region_anno_dict, ans_dict, vocab,
image_dir, mean_image, start_index, batch_size,
img_height=100, img_width=100, channels = 3):
# compute the number of proposals
count = 0;
for i in xrange(start_index, start_index + batch_size):
count = count + len(region_anno_dict[qa_dict[i].image_id])
region_images = np.empty(shape=[count, img_height,
img_width, channels])
ans_labels = np.zeros(shape=[count, len(ans_dict)])
question_encodings = np.zeros(shape=[count, len(vocab)])
counter = 0
for i in xrange(start_index, start_index + batch_size):
image_id = qa_dict[i].image_id
question = qa_dict[i].question
answer = qa_dict[i].answer
region_coords = region_anno_dict[image_id]
image = mpimg.imread(os.path.join(image_dir, str(image_id) + '.jpg'))
regions = region_proposer.rank_regions(image, question, region_coords)
for _, proposal in regions.items():
resized_region = misc.imresize(proposal.image, \
(img_height, img_width))
region_images[counter,:,:,:] = (resized_region / 254.0) - mean_image
ans_labels[counter, ans_dict[answer]] = 1
for word in question[0:-1].split():
if word not in vocab:
word = 'unk'
question_encodings[counter, vocab[word]] += 1
counter = counter + 1
return region_images, ans_labels, question_encodings
if __name__=='__main__':
train_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/train_anno.json'
region_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/regions_anno.json'
image_dir = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/images'
qa_anno_dict = parse_qa_anno(train_anno_filename)
region_anno_dict = region_proposer.parse_region_anno(region_anno_filename)
ans_dict, _ = create_ans_dict()
vocab, _ = get_vocab(qa_anno_dict)
region_images, ans_labels, question_encodings = \
ans_mini_batch_loader(qa_anno_dict, region_anno_dict, ans_dict, vocab,
image_dir, None, 1, 2, 25, 25, 3)
print(ans_labels.shape)
print(question_encodings.shape)
print(region_images.shape)
File added
import sys
import os
import json
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import tensorflow as tf
import object_classifiers.obj_data_io_helper as obj_data_loader
import attribute_classifiers.atr_data_io_helper as atr_data_loader
import tf_graph_creation_helper as graph_creator
import plot_helper as plotter
import ans_data_io_helper as ans_io_helper
import region_ranker.perfect_ranker as region_proposer
def train(train_params):
sess = tf.InteractiveSession()
train_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/train_anno.json'
regions_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/regions_anno.json'
image_dir = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/images'
outdir = '/home/tanmay/Code/GenVQA/Exp_Results/Ans_Classifier'
if not os.path.exists(outdir):
os.mkdir(outdir)
qa_anno_dict = ans_io_helper.parse_qa_anno(train_anno_filename)
region_anno_dict = region_proposer.parse_region_anno(regions_anno_filename)
ans_vocab, inv_ans_vocab = ans_io_helper.create_ans_dict()
vocab, inv_vocab = ans_io_helper.get_vocab(qa_anno_dict)
# Create graph
image_regions, questions, keep_prob, y = \
graph_creator.placeholder_inputs_ans(len(vocab), len(ans_vocab),
mode='gt')
y_pred_obj = graph_creator.obj_comp_graph(image_regions, keep_prob)
obj_feat = tf.get_collection('obj_feat', scope='obj/conv2')
y_pred_atr = graph_creator.atr_comp_graph(image_regions, keep_prob, obj_feat[0])
atr_feat = tf.get_collection('atr_feat', scope='atr/conv2')
# model restoration
obj_atr_saver = tf.train.Saver()
model_to_restore = '/home/tanmay/Code/GenVQA/GenVQA/classifiers/' + \
'saved_models/obj_atr_classifier-1'
obj_atr_saver.restore(sess, model_to_restore)
y_pred = graph_creator.ans_comp_graph(image_regions, questions, keep_prob, \
obj_feat[0], atr_feat[0],
vocab, inv_vocab, len(ans_vocab))
cross_entropy = graph_creator.loss(y, y_pred)
accuracy = graph_creator.evaluation(y, y_pred)
# Collect variables
vars_to_opt = tf.get_collection(tf.GraphKeys.VARIABLES, scope='ans')
train_step = tf.train.AdamOptimizer(train_params['adam_lr']) \
.minimize(cross_entropy, var_list=vars_to_opt)
vars_to_restore = []
vars_to_restore.append(tf.get_collection(tf.GraphKeys.VARIABLES,
scope='obj'))
vars_to_restore.append(tf.get_collection(tf.GraphKeys.VARIABLES,
scope='atr'))
all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
vars_to_init = [var for var in all_vars if var not in vars_to_restore]
# Session saver
saver = tf.train.Saver()
# Initializing all variables except those restored
print('Initializing variables')
sess.run(tf.initialize_variables(vars_to_init))
# Load mean image
mean_image = np.load('/home/tanmay/Code/GenVQA/Exp_Results/' + \
'Obj_Classifier/mean_image.npy')
# Val data
val_region_images, val_ans_labels, val_questions = \
ans_io_helper.ans_mini_batch_loader(qa_anno_dict, region_anno_dict,
ans_vocab, vocab, image_dir,
mean_image, 9501, 499,
25, 25, 3)
feed_dict_val = {
image_regions : val_region_images,
questions: val_questions,
keep_prob: 1.0,
y: val_ans_labels,
}
# Start Training
batch_size = 10
max_epoch = 10
max_iter = 950
val_acc_array_epoch = np.zeros([max_epoch])
train_acc_array_epoch = np.zeros([max_epoch])
for epoch in range(max_epoch):
for i in range(max_iter):
if i%100==0:
print('Iter: ' + str(i))
print('Val Acc: ' + str(accuracy.eval(feed_dict_val)))
train_region_images, train_ans_labels, train_questions = \
ans_io_helper.ans_mini_batch_loader(qa_anno_dict, region_anno_dict,
ans_vocab, vocab, image_dir,
mean_image, 1+i*batch_size,
batch_size, 25, 25, 3)
feed_dict_train = {
image_regions : train_region_images,
questions: train_questions,
keep_prob: 1.0,
y: train_ans_labels,
}
_, current_train_batch_acc = sess.run([train_step, accuracy],
feed_dict=feed_dict_train)
train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] + \
current_train_batch_acc
train_acc_array_epoch[epoch] = train_acc_array_epoch[epoch] / max_iter
val_acc_array_epoch[epoch] = accuracy.eval(feed_dict_val)
plotter.plot_accuracies(xdata=np.arange(0, epoch + 1) + 1,
ydata_train=train_acc_array_epoch[0:epoch + 1],
ydata_val=val_acc_array_epoch[0:epoch + 1],
xlim=[1, max_epoch], ylim=[0, 1.0],
savePath=os.path.join(outdir,
'acc_vs_epoch.pdf'))
save_path = saver.save(sess, os.path.join(outdir, 'ans_classifier'),
global_step=epoch)
sess.close()
tf.reset_default_graph()
if __name__=='__main__':
train_params = {
'adam_lr' : 0.001,
}
train(train_params)
import json
import sys
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import tensorflow as tf
from scipy import misc
def obj_mini_batch_loader(json_data, image_dir, mean_image, start_index, batch_size, img_height = 100, img_width = 100, channels = 3):
obj_images = np.empty(shape=[9 * batch_size, img_height / 3, img_width / 3, channels])
obj_labels = np.zeros(shape=[9 * batch_size, 4])
for i in range(start_index, start_index + batch_size):
image_name = os.path.join(image_dir, str(i) + '.jpg')
image = misc.imresize(mpimg.imread(image_name), (img_height, img_width), interp='nearest')
crop_shape = np.array([image.shape[0], image.shape[1]]) / 3
grid_config = json_data[i]
counter = 0
for grid_row in range(0, 3):
for grid_col in range(0, 3):
start_row = grid_row * crop_shape[0]
start_col = grid_col * crop_shape[1]
cropped_image = image[start_row:start_row + crop_shape[0], start_col:start_col + crop_shape[1], :]
if np.ndim(mean_image) == 0:
obj_images[9 * (i - start_index) + counter, :, :, :] = cropped_image / 254.0
else:
obj_images[9 * (i - start_index) + counter, :, :, :] = (cropped_image / 254.0) - mean_image
obj_labels[9 * (i - start_index) + counter, grid_config[6 * grid_row + 2 * grid_col]] = 1
counter = counter + 1
return (obj_images, obj_labels)
def mean_image_batch(json_data, image_dir, start_index, batch_size, img_height = 100, img_width = 100, channels = 3):
batch = obj_mini_batch_loader(json_data, image_dir, np.empty([]), start_index, batch_size, img_height, img_width, channels)
mean_image = np.mean(batch[0], 0)
return mean_image
def mean_image(json_data, image_dir, num_images, batch_size, img_height = 100, img_width = 100, channels = 3):
max_iter = np.floor(num_images / batch_size)
mean_image = np.zeros([img_height / 3, img_width / 3, channels])
for i in range(max_iter.astype(np.int16)):
mean_image = mean_image + mean_image_batch(json_data, image_dir, 1 + i * batch_size, batch_size, img_height, img_width, channels)
mean_image = mean_image / max_iter
return mean_image
class html_obj_table_writer:
def __init__(self, filename):
self.filename = filename
self.html_file = open(self.filename, 'w')
self.html_file.write('<!DOCTYPE html>\n<html>\n<body>\n<table border="1" style="width:100%"> \n')
def add_element(self, col_dict):
self.html_file.write(' <tr>\n')
for key in range(len(col_dict)):
self.html_file.write(' <td>{}</td>\n'.format(col_dict[key]))
self.html_file.write(' </tr>\n')
def image_tag(self, image_path, height, width):
return '<img src="{}" alt="IMAGE NOT FOUND!" height={} width={}>'.format(image_path, height, width)
def close_file(self):
self.html_file.write('</table>\n</body>\n</html>')
self.html_file.close()
if __name__ == '__main__':
html_writer = html_obj_table_writer('/home/tanmay/Code/GenVQA/Exp_Results/Shape_Classifier_v_1/trial.html')
col_dict = {0: 'sam',
1: html_writer.image_tag('something.png', 25, 25)}
html_writer.add_element(col_dict)
html_writer.close_file()
tanmay@crunchy.15752:1450461082
\ No newline at end of file
No preview for this file type
File added
import json
import os
import numpy as np
from collections import namedtuple
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy import misc
region = namedtuple('region','image score coord')
def parse_region_anno(json_filename):
with open(json_filename,'r') as json_file:
raw_data = json.load(json_file)
region_anno_dict = dict()
for entry in raw_data:
region_anno_dict[entry['image_id']] = entry['regions']
return region_anno_dict
def rank_regions(image, question, region_coords):
regions = dict()
count = 1;
for key in region_coords:
x1, y1, x2, y2 = region_coords[key]
cropped_image = image[y1-1:y2, x1-1:x2, :]
if key in question:
score = 1
else:
score = 0
regions[count] = region(image=cropped_image, score=score,
coord=region_coords[key])
count = count + 1
return regions
if __name__=='__main__':
image_dir = '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images/'
json_filename = os.path.join('/home/tanmay/Code/GenVQA/GenVQA/',
'shapes_dataset/regions_anno.json')
region_anno_dict = parse_region_anno(json_filename)
image_id = 1
question = 'Is there a blue triangle?'
region_coords = region_anno_dict[image_id]
image = mpimg.imread(os.path.join(image_dir, str(image_id) + '.jpg'))
regions = rank_regions(image, question, region_coords)
print(regions)
File added
File added
import numpy as np
import tensorflow as tf
import answer_classifier.ans_data_io_helper as ans_io_helper
def weight_variable(shape, var_name = 'W'):
initial = tf.truncated_normal(shape, stddev=0.1)
......@@ -30,6 +31,18 @@ def placeholder_inputs(mode = 'gt'):
print 'No placeholder for ground truth'
return (x, keep_prob)
def placeholder_inputs_ans(total_vocab_size, ans_vocab_size, mode='gt'):
image_regions = tf.placeholder(tf.float32, shape=[None,25,25,3])
keep_prob = tf.placeholder(tf.float32)
questions = tf.placeholder(tf.float32, shape=[None,total_vocab_size])
if mode == 'gt':
print 'Creating placeholder for ground truth'
gt_answer = tf.placeholder(tf.float32, shape=[None, ans_vocab_size])
return (image_regions, questions, keep_prob, gt_answer)
if mode == 'no_gt':
print 'No placeholder for ground truth'
return (image_regions, questions, keep_prob)
def obj_comp_graph(x, keep_prob):
with tf.name_scope('obj') as obj_graph:
......@@ -77,6 +90,50 @@ def atr_comp_graph(x, keep_prob, obj_feat):
tf.add_to_collection('atr_feat', h_pool2_drop_flat)
return y_pred
def ans_comp_graph(image_regions, questions, keep_prob, \
obj_feat, atr_feat, vocab, inv_vocab, ans_vocab_size):
with tf.name_scope('ans') as ans_graph:
with tf.name_scope('word_embed') as word_embed:
initial = tf.random_uniform(shape=[len(vocab),100], minval=0, maxval=1)
word_vecs = tf.Variable(initial, name='word_vecs')
with tf.name_scope('q_embed') as q_embed:
q_feat = tf.matmul(questions, word_vecs)
num_words = tf.reduce_sum(questions, 1, keep_dims=True)
q_feat = tf.truediv(q_feat, num_words)
with tf.name_scope('conv1') as conv1:
W_conv1 = weight_variable([5,5,3,4])
b_conv1 = bias_variable([4])
h_conv1 = tf.nn.relu(conv2d(image_regions, W_conv1) + b_conv1, name='h')
h_pool1 = max_pool_2x2(h_conv1)
h_conv1_drop = tf.nn.dropout(h_pool1, keep_prob, name='h_pool_drop')
with tf.name_scope('conv2') as conv2:
W_conv2 = weight_variable([3,3,4,8])
b_conv2 = bias_variable([8])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h')
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob, name='h_pool_drop')
h_pool2_drop_flat = tf.reshape(h_pool2_drop, [-1, 392], name='h_pool_drop_flat')
with tf.name_scope('fc1') as fc1:
W_region_fc1 = weight_variable([392, ans_vocab_size], var_name='W_region')
W_obj_fc1 = weight_variable([392, ans_vocab_size], var_name='W_obj')
W_atr_fc1 = weight_variable([392, ans_vocab_size], var_name='W_atr')
W_q_fc1 = weight_variable([100, ans_vocab_size], var_name='W_q')
b_fc1 = bias_variable([ans_vocab_size])
y_pred = tf.nn.softmax(tf.matmul(h_pool2_drop_flat, W_region_fc1) + \
tf.matmul(obj_feat, W_obj_fc1) + \
tf.matmul(atr_feat, W_atr_fc1) + \
tf.matmul(q_feat, W_q_fc1) + b_fc1)
tf.add_to_collection('region_feat', h_pool2_drop_flat)
tf.add_to_collection('q_feat', q_feat)
return y_pred
def evaluation(y, y_pred):
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1), name='correct_prediction')
......@@ -91,19 +148,31 @@ def loss(y, y_pred):
if __name__ == '__main__':
lg_dir = '/home/tanmay/Code/GenVQA/Exp_Results/lg_files/'
ans_vocab, _ = ans_io_helper.create_ans_dict()
train_anno_filename = '/home/tanmay/Code/GenVQA/GenVQA/' + \
'shapes_dataset/train_anno.json'
qa_dict = ans_io_helper.parse_qa_anno(train_anno_filename)
vocab, inv_vocab = ans_io_helper.get_vocab(qa_dict)
g = tf.Graph()
with g.as_default():
x, y, keep_prob = placeholder_inputs(mode='gt')
y_pred = obj_comp_graph(x, keep_prob)
image_regions, questions, keep_prob = \
placeholder_inputs_ans(len(vocab), len(ans_vocab), mode='no_gt')
y_pred = obj_comp_graph(image_regions, keep_prob)
obj_feat = tf.get_collection('obj_feat', scope='obj/conv2')
y_pred2 = atr_comp_graph(x, keep_prob, obj_feat[0])
accuracy = evaluation(y, y_pred2)
accuracy_summary = tf.scalar_summary('accuracy', accuracy)
y_pred2 = atr_comp_graph(image_regions, keep_prob, obj_feat[0])
atr_feat = tf.get_collection('atr_feat', scope='atr/conv2')
y_pred3 = ans_comp_graph(image_regions, questions, keep_prob, \
obj_feat[0], atr_feat[0],
vocab, inv_vocab, len(ans_vocab))
# accuracy = evaluation(y, y_pred2)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
merged = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(lg_dir, graph_def=g.as_graph_def())
result = sess.run([merged, y_pred], feed_dict={x: np.random.rand(10, 25, 25, 3),
y: np.random.rand(10, 4),
keep_prob: 1.0})
summary_writer.add_summary(result[0], 1)
# result = sess.run([merged, y_pred], feed_dict={x: np.random.rand(10, 25, 25, 3),
# y: np.random.rand(10, 4),
# keep_prob: 1.0})
No preview for this file type
......@@ -11,10 +11,10 @@ import attribute_classifiers.train_atr_classifier as atr_trainer
import attribute_classifiers.eval_atr_classifier as atr_evaluator
workflow = {
'train_obj': True,
'eval_obj': True,
'train_atr': True,
'eval_atr': True,
'train_obj': False,
'eval_obj': False,
'train_atr': False,
'eval_atr': False,
}
obj_classifier_train_params = {
......@@ -28,7 +28,7 @@ obj_classifier_train_params = {
obj_classifier_eval_params = {
'out_dir': '/home/tanmay/Code/GenVQA/Exp_Results/Obj_Classifier',
'model_name': '/home/tanmay/Code/GenVQA/Exp_Results/Obj_Classifier/obj_classifier',
'model_name': '/home/tanmay/Code/GenVQA/Exp_Results/Atr_Classifier/obj_atr_classifier',
'global_step': 1,
'test_json': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/test_anno.json',
'image_dir': '/home/tanmay/Code/GenVQA/GenVQA/shapes_dataset/images',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment