diff --git a/answer_classifier_cached_features/eval.py b/answer_classifier_cached_features/eval.py index 09d5e6da153a16570edafad3c50588318926cfcf..5e32665510c90c068a04f7c12f5a96190415a095 100644 --- a/answer_classifier_cached_features/eval.py +++ b/answer_classifier_cached_features/eval.py @@ -38,23 +38,18 @@ def create_initializer(graph, sess, model): return initializer() def create_batch_generator(mode): - if mode=='val_subset': + if mode=='val': vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir vqa_anno = constants.vqa_val_anno - num_questions = constants.num_val_subset_questions + num_questions = constants.num_val_questions offset = 0 - elif mode=='val_rest': + elif mode=='val_subset': vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir vqa_anno = constants.vqa_val_anno - num_questions = constants.num_val_rest_questions - offset = constants.num_val_subset_questions - elif mode=='train': - vqa_resnet_feat_dir = constants.vqa_train_resnet_feat_dir - vqa_anno = constants.vqa_train_anno - num_questions = constants.num_train_questions + num_questions = constants.num_val_subset_questions offset = 0 else: - print "mode needs to be one of {'train','val_subset','val_rest'}, found " + mode + print "mode needs to be one of {'val','val_subset'}, found " + mode data_mgr = vqa_data.data( vqa_resnet_feat_dir, @@ -143,6 +138,7 @@ class eval_mgr(): self.correct = 0 self.total = 0 self.results = [] + self.seen_qids = set() def eval(self, iter, eval_vars_dict, batch): batch_size = len(batch['question_unencoded']) @@ -173,7 +169,12 @@ class eval_mgr(): 'question_id': int(question_id), 'answer': pred_answer } - self.results.append(result_entry) + + if question_id not in self.seen_qids: + self.seen_qids.add(question_id) + self.results.append(result_entry) + else: + print 'Already evaluated on this sample' self.eval_data[str(question_id)] = dict_entry diff --git a/answer_classifier_cached_features/select_best_model.py b/answer_classifier_cached_features/select_best_model.py index ee6b2706151d02216b663c8368504dcebd641c72..f483337276dc87b39f9cea1e64eb7c605306f5e8 100644 --- a/answer_classifier_cached_features/select_best_model.py +++ b/answer_classifier_cached_features/select_best_model.py @@ -39,16 +39,13 @@ def create_initializer(graph, sess, model): return initializer() def create_batch_generator(mode): - if mode=='val': - vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir - vqa_anno = constants.vqa_val_anno - num_questions = constants.num_val_questions - elif mode=='train': + if mode=='train_subset': vqa_resnet_feat_dir = constants.vqa_train_resnet_feat_dir vqa_anno = constants.vqa_train_anno - num_questions = constants.num_train_questions + num_questions = constants.num_train_held_out_questions + offset = constants.num_train_subset_questions else: - print "mode needs to be one of {'train','test','val'}, found " + mode + print "mode needs to be one of {'train_subset'}, found " + mode data_mgr = vqa_data.data( vqa_resnet_feat_dir, @@ -64,7 +61,7 @@ def create_batch_generator(mode): constants.answer_batch_size, num_questions, 1, - 0) + offset) batch_generator = tftools.data.async_batch_generator( data_mgr, @@ -135,6 +132,7 @@ class eval_mgr(): self.correct = 0 self.total = 0 self.results = [] + self.seen_qids = set() def eval(self, iter, eval_vars_dict, batch): batch_size = len(batch['question_unencoded']) @@ -151,7 +149,12 @@ class eval_mgr(): 'question_id': int(question_id), 'answer': pred_answer } - self.results.append(result_entry) + + if question_id not in self.seen_qids: + self.seen_qids.add(question_id) + self.results.append(result_entry) + else: + print 'Already evaluated on this sample' self.total += batch_size diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py index 608516a0362715a7956ca627785c44cc3efc2b23..63dfdf3f7885c3d9a23bbcc1706382ead119881b 100644 --- a/answer_classifier_cached_features/train.py +++ b/answer_classifier_cached_features/train.py @@ -475,7 +475,7 @@ def create_vqa_batch_generator(): index_generator = tftools.data.random( constants.answer_batch_size, - constants.num_train_questions, + constants.num_train_subset_questions, constants.answer_num_epochs, constants.answer_offset) diff --git a/constants_crunchy.py b/constants_crunchy.py index ac7d315ae3030811ee81a5e00bc33c3c37acb431..df1928105a59704db182bbd47c43284f5c25c3f9 100644 --- a/constants_crunchy.py +++ b/constants_crunchy.py @@ -154,9 +154,10 @@ vqa_answer_vocab_json = os.path.join( # VQA dataset params num_train_questions = 248349 -num_val_subset_questions = 10000 +num_train_held_out_questions = 24835 +num_train_subset_questions = num_train_question - num_train_held_out_questions num_val_questions = 121512 -num_val_rest_questions = num_val_questions - num_val_subset_questions +num_val_subset_questions = 10000 num_test_questions = 0 diff --git a/tftools/data.py b/tftools/data.py index 1a60f44c3a2cce38084221a89e4e0b432da3f3ab..a872a0653cb4a9dffe1bc5de9b44cd26868d80cd 100644 --- a/tftools/data.py +++ b/tftools/data.py @@ -6,21 +6,23 @@ import time def sequential(batch_size, num_samples, num_epochs=1, offset=0): """Generate sequence indices. """ + num_samples_ = int(batch_size*np.ceil(num_samples/float(batch_size))) for epoch in range(num_epochs): - indices = np.arange(num_samples) + offset + indices = np.arange(num_samples_)%num_samples + offset indices = indices.tolist() - for i in range(0, num_samples - batch_size + 1, batch_size): + for i in range(0, num_samples_ - batch_size + 1, batch_size): yield indices[i:i+batch_size] def random(batch_size, num_samples, num_epochs, offset=0): """Generate random indices. """ + num_samples_ = int(batch_size*np.ceil(num_samples/float(batch_size))) for epoch in range(num_epochs): # np.random.seed(epoch) - indices = np.random.permutation(num_samples) + offset + indices = np.random.permutation(num_samples_)%num_samples + offset indices = indices.tolist() - for i in range(0, num_samples - batch_size + 1, batch_size): + for i in range(0, num_samples_ - batch_size + 1, batch_size): yield indices[i:i+batch_size]