Skip to content
Snippets Groups Projects
Commit d80c835a authored by tgupta6's avatar tgupta6
Browse files

Circular batch and train held out

parent 8922eafe
No related branches found
No related tags found
No related merge requests found
...@@ -38,23 +38,18 @@ def create_initializer(graph, sess, model): ...@@ -38,23 +38,18 @@ def create_initializer(graph, sess, model):
return initializer() return initializer()
def create_batch_generator(mode): def create_batch_generator(mode):
if mode=='val_subset': if mode=='val':
vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir
vqa_anno = constants.vqa_val_anno vqa_anno = constants.vqa_val_anno
num_questions = constants.num_val_subset_questions num_questions = constants.num_val_questions
offset = 0 offset = 0
elif mode=='val_rest': elif mode=='val_subset':
vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir
vqa_anno = constants.vqa_val_anno vqa_anno = constants.vqa_val_anno
num_questions = constants.num_val_rest_questions num_questions = constants.num_val_subset_questions
offset = constants.num_val_subset_questions
elif mode=='train':
vqa_resnet_feat_dir = constants.vqa_train_resnet_feat_dir
vqa_anno = constants.vqa_train_anno
num_questions = constants.num_train_questions
offset = 0 offset = 0
else: else:
print "mode needs to be one of {'train','val_subset','val_rest'}, found " + mode print "mode needs to be one of {'val','val_subset'}, found " + mode
data_mgr = vqa_data.data( data_mgr = vqa_data.data(
vqa_resnet_feat_dir, vqa_resnet_feat_dir,
...@@ -143,6 +138,7 @@ class eval_mgr(): ...@@ -143,6 +138,7 @@ class eval_mgr():
self.correct = 0 self.correct = 0
self.total = 0 self.total = 0
self.results = [] self.results = []
self.seen_qids = set()
def eval(self, iter, eval_vars_dict, batch): def eval(self, iter, eval_vars_dict, batch):
batch_size = len(batch['question_unencoded']) batch_size = len(batch['question_unencoded'])
...@@ -173,7 +169,12 @@ class eval_mgr(): ...@@ -173,7 +169,12 @@ class eval_mgr():
'question_id': int(question_id), 'question_id': int(question_id),
'answer': pred_answer 'answer': pred_answer
} }
self.results.append(result_entry)
if question_id not in self.seen_qids:
self.seen_qids.add(question_id)
self.results.append(result_entry)
else:
print 'Already evaluated on this sample'
self.eval_data[str(question_id)] = dict_entry self.eval_data[str(question_id)] = dict_entry
......
...@@ -39,16 +39,13 @@ def create_initializer(graph, sess, model): ...@@ -39,16 +39,13 @@ def create_initializer(graph, sess, model):
return initializer() return initializer()
def create_batch_generator(mode): def create_batch_generator(mode):
if mode=='val': if mode=='train_subset':
vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir
vqa_anno = constants.vqa_val_anno
num_questions = constants.num_val_questions
elif mode=='train':
vqa_resnet_feat_dir = constants.vqa_train_resnet_feat_dir vqa_resnet_feat_dir = constants.vqa_train_resnet_feat_dir
vqa_anno = constants.vqa_train_anno vqa_anno = constants.vqa_train_anno
num_questions = constants.num_train_questions num_questions = constants.num_train_held_out_questions
offset = constants.num_train_subset_questions
else: else:
print "mode needs to be one of {'train','test','val'}, found " + mode print "mode needs to be one of {'train_subset'}, found " + mode
data_mgr = vqa_data.data( data_mgr = vqa_data.data(
vqa_resnet_feat_dir, vqa_resnet_feat_dir,
...@@ -64,7 +61,7 @@ def create_batch_generator(mode): ...@@ -64,7 +61,7 @@ def create_batch_generator(mode):
constants.answer_batch_size, constants.answer_batch_size,
num_questions, num_questions,
1, 1,
0) offset)
batch_generator = tftools.data.async_batch_generator( batch_generator = tftools.data.async_batch_generator(
data_mgr, data_mgr,
...@@ -135,6 +132,7 @@ class eval_mgr(): ...@@ -135,6 +132,7 @@ class eval_mgr():
self.correct = 0 self.correct = 0
self.total = 0 self.total = 0
self.results = [] self.results = []
self.seen_qids = set()
def eval(self, iter, eval_vars_dict, batch): def eval(self, iter, eval_vars_dict, batch):
batch_size = len(batch['question_unencoded']) batch_size = len(batch['question_unencoded'])
...@@ -151,7 +149,12 @@ class eval_mgr(): ...@@ -151,7 +149,12 @@ class eval_mgr():
'question_id': int(question_id), 'question_id': int(question_id),
'answer': pred_answer 'answer': pred_answer
} }
self.results.append(result_entry)
if question_id not in self.seen_qids:
self.seen_qids.add(question_id)
self.results.append(result_entry)
else:
print 'Already evaluated on this sample'
self.total += batch_size self.total += batch_size
......
...@@ -475,7 +475,7 @@ def create_vqa_batch_generator(): ...@@ -475,7 +475,7 @@ def create_vqa_batch_generator():
index_generator = tftools.data.random( index_generator = tftools.data.random(
constants.answer_batch_size, constants.answer_batch_size,
constants.num_train_questions, constants.num_train_subset_questions,
constants.answer_num_epochs, constants.answer_num_epochs,
constants.answer_offset) constants.answer_offset)
......
...@@ -154,9 +154,10 @@ vqa_answer_vocab_json = os.path.join( ...@@ -154,9 +154,10 @@ vqa_answer_vocab_json = os.path.join(
# VQA dataset params # VQA dataset params
num_train_questions = 248349 num_train_questions = 248349
num_val_subset_questions = 10000 num_train_held_out_questions = 24835
num_train_subset_questions = num_train_question - num_train_held_out_questions
num_val_questions = 121512 num_val_questions = 121512
num_val_rest_questions = num_val_questions - num_val_subset_questions num_val_subset_questions = 10000
num_test_questions = 0 num_test_questions = 0
......
...@@ -6,21 +6,23 @@ import time ...@@ -6,21 +6,23 @@ import time
def sequential(batch_size, num_samples, num_epochs=1, offset=0): def sequential(batch_size, num_samples, num_epochs=1, offset=0):
"""Generate sequence indices. """Generate sequence indices.
""" """
num_samples_ = int(batch_size*np.ceil(num_samples/float(batch_size)))
for epoch in range(num_epochs): for epoch in range(num_epochs):
indices = np.arange(num_samples) + offset indices = np.arange(num_samples_)%num_samples + offset
indices = indices.tolist() indices = indices.tolist()
for i in range(0, num_samples - batch_size + 1, batch_size): for i in range(0, num_samples_ - batch_size + 1, batch_size):
yield indices[i:i+batch_size] yield indices[i:i+batch_size]
def random(batch_size, num_samples, num_epochs, offset=0): def random(batch_size, num_samples, num_epochs, offset=0):
"""Generate random indices. """Generate random indices.
""" """
num_samples_ = int(batch_size*np.ceil(num_samples/float(batch_size)))
for epoch in range(num_epochs): for epoch in range(num_epochs):
# np.random.seed(epoch) # np.random.seed(epoch)
indices = np.random.permutation(num_samples) + offset indices = np.random.permutation(num_samples_)%num_samples + offset
indices = indices.tolist() indices = indices.tolist()
for i in range(0, num_samples - batch_size + 1, batch_size): for i in range(0, num_samples_ - batch_size + 1, batch_size):
yield indices[i:i+batch_size] yield indices[i:i+batch_size]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment