diff --git a/answer_classifier_cached_features/eval.py b/answer_classifier_cached_features/eval.py
index 09d5e6da153a16570edafad3c50588318926cfcf..5e32665510c90c068a04f7c12f5a96190415a095 100644
--- a/answer_classifier_cached_features/eval.py
+++ b/answer_classifier_cached_features/eval.py
@@ -38,23 +38,18 @@ def create_initializer(graph, sess, model):
     return initializer()
 
 def create_batch_generator(mode):
-    if mode=='val_subset':
+    if mode=='val':
         vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir
         vqa_anno = constants.vqa_val_anno
-        num_questions = constants.num_val_subset_questions
+        num_questions = constants.num_val_questions
         offset = 0
-    elif mode=='val_rest':
+    elif mode=='val_subset':
         vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir
         vqa_anno = constants.vqa_val_anno
-        num_questions = constants.num_val_rest_questions
-        offset = constants.num_val_subset_questions
-    elif mode=='train':
-        vqa_resnet_feat_dir = constants.vqa_train_resnet_feat_dir
-        vqa_anno = constants.vqa_train_anno
-        num_questions = constants.num_train_questions
+        num_questions = constants.num_val_subset_questions
         offset = 0
     else:
-        print "mode needs to be one of {'train','val_subset','val_rest'}, found " + mode
+        print "mode needs to be one of {'val','val_subset'}, found " + mode
     
     data_mgr = vqa_data.data(
         vqa_resnet_feat_dir,
@@ -143,6 +138,7 @@ class eval_mgr():
         self.correct = 0
         self.total = 0
         self.results = []
+        self.seen_qids = set()
 
     def eval(self, iter, eval_vars_dict, batch):
         batch_size = len(batch['question_unencoded'])
@@ -173,7 +169,12 @@ class eval_mgr():
                 'question_id': int(question_id),
                 'answer': pred_answer
             }
-            self.results.append(result_entry)
+            
+            if question_id not in self.seen_qids:
+                self.seen_qids.add(question_id)
+                self.results.append(result_entry)
+            else:
+                print 'Already evaluated on this sample'
             
             self.eval_data[str(question_id)] = dict_entry
 
diff --git a/answer_classifier_cached_features/select_best_model.py b/answer_classifier_cached_features/select_best_model.py
index ee6b2706151d02216b663c8368504dcebd641c72..f483337276dc87b39f9cea1e64eb7c605306f5e8 100644
--- a/answer_classifier_cached_features/select_best_model.py
+++ b/answer_classifier_cached_features/select_best_model.py
@@ -39,16 +39,13 @@ def create_initializer(graph, sess, model):
     return initializer()
 
 def create_batch_generator(mode):
-    if mode=='val':
-        vqa_resnet_feat_dir = constants.vqa_val_resnet_feat_dir
-        vqa_anno = constants.vqa_val_anno
-        num_questions = constants.num_val_questions
-    elif mode=='train':
+    if mode=='train_subset':
         vqa_resnet_feat_dir = constants.vqa_train_resnet_feat_dir
         vqa_anno = constants.vqa_train_anno
-        num_questions = constants.num_train_questions
+        num_questions = constants.num_train_held_out_questions
+        offset = constants.num_train_subset_questions
     else:
-        print "mode needs to be one of {'train','test','val'}, found " + mode
+        print "mode needs to be one of {'train_subset'}, found " + mode
     
     data_mgr = vqa_data.data(
         vqa_resnet_feat_dir,
@@ -64,7 +61,7 @@ def create_batch_generator(mode):
         constants.answer_batch_size, 
         num_questions, 
         1, 
-        0)
+        offset)
     
     batch_generator = tftools.data.async_batch_generator(
         data_mgr, 
@@ -135,6 +132,7 @@ class eval_mgr():
         self.correct = 0
         self.total = 0
         self.results = []
+        self.seen_qids = set()
 
     def eval(self, iter, eval_vars_dict, batch):
         batch_size = len(batch['question_unencoded'])
@@ -151,7 +149,12 @@ class eval_mgr():
                 'question_id': int(question_id),
                 'answer': pred_answer
             }
-            self.results.append(result_entry)
+
+            if question_id not in self.seen_qids:
+                self.seen_qids.add(question_id)
+                self.results.append(result_entry)
+            else:
+                print 'Already evaluated on this sample'
             
 
         self.total += batch_size
diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py
index 608516a0362715a7956ca627785c44cc3efc2b23..63dfdf3f7885c3d9a23bbcc1706382ead119881b 100644
--- a/answer_classifier_cached_features/train.py
+++ b/answer_classifier_cached_features/train.py
@@ -475,7 +475,7 @@ def create_vqa_batch_generator():
 
     index_generator = tftools.data.random(
         constants.answer_batch_size, 
-        constants.num_train_questions, 
+        constants.num_train_subset_questions, 
         constants.answer_num_epochs, 
         constants.answer_offset)
     
diff --git a/constants_crunchy.py b/constants_crunchy.py
index ac7d315ae3030811ee81a5e00bc33c3c37acb431..df1928105a59704db182bbd47c43284f5c25c3f9 100644
--- a/constants_crunchy.py
+++ b/constants_crunchy.py
@@ -154,9 +154,10 @@ vqa_answer_vocab_json = os.path.join(
 
 # VQA dataset params
 num_train_questions = 248349
-num_val_subset_questions = 10000
+num_train_held_out_questions = 24835
+num_train_subset_questions = num_train_question - num_train_held_out_questions
 num_val_questions = 121512
-num_val_rest_questions = num_val_questions - num_val_subset_questions
+num_val_subset_questions = 10000
 
 num_test_questions = 0
 
diff --git a/tftools/data.py b/tftools/data.py
index 1a60f44c3a2cce38084221a89e4e0b432da3f3ab..a872a0653cb4a9dffe1bc5de9b44cd26868d80cd 100644
--- a/tftools/data.py
+++ b/tftools/data.py
@@ -6,21 +6,23 @@ import time
 def sequential(batch_size, num_samples, num_epochs=1, offset=0):
     """Generate sequence indices.
     """
+    num_samples_ = int(batch_size*np.ceil(num_samples/float(batch_size)))
     for epoch in range(num_epochs):
-        indices = np.arange(num_samples) + offset
+        indices = np.arange(num_samples_)%num_samples + offset
         indices = indices.tolist()
-        for i in range(0, num_samples - batch_size + 1, batch_size):
+        for i in range(0, num_samples_ - batch_size + 1, batch_size):
             yield indices[i:i+batch_size]
 
 
 def random(batch_size, num_samples, num_epochs, offset=0):
     """Generate random indices.
     """
+    num_samples_ = int(batch_size*np.ceil(num_samples/float(batch_size)))
     for epoch in range(num_epochs):
 #        np.random.seed(epoch)
-        indices = np.random.permutation(num_samples) + offset
+        indices = np.random.permutation(num_samples_)%num_samples + offset
         indices = indices.tolist()
-        for i in range(0, num_samples - batch_size + 1, batch_size):
+        for i in range(0, num_samples_ - batch_size + 1, batch_size):
             yield indices[i:i+batch_size]