From d3b9aaf6047587db775eff811e45c555382a032a Mon Sep 17 00:00:00 2001
From: tgupta6 <tgupta6@illinois.edu>
Date: Thu, 25 Aug 2016 23:16:02 -0500
Subject: [PATCH] vqa evaluation code and answer classifier scripts changed to
 using qids

---
 .../fine_grained_eval.py                      |  10 ++
 constants_crunchy.py                          |  23 ++-
 .../eval.py                                   |   3 +-
 tftools/train.py                              | 144 ++++++++++++++++++
 vqa_parser.py                                 |  84 +++++++---
 5 files changed, 238 insertions(+), 26 deletions(-)
 create mode 100644 answer_classifier_cached_features/fine_grained_eval.py
 create mode 100644 tftools/train.py

diff --git a/answer_classifier_cached_features/fine_grained_eval.py b/answer_classifier_cached_features/fine_grained_eval.py
new file mode 100644
index 0000000..32b2a6f
--- /dev/null
+++ b/answer_classifier_cached_features/fine_grained_eval.py
@@ -0,0 +1,10 @@
+from vqa_eval import analyzer
+import constants
+
+if __name__=='__main__':
+    analyzer.analyze(
+        constants.raw_vqa_val_anno_json,
+        constants.raw_vqa_val_ques_json,
+        constants.answer_eval_results_json,
+        constants.vqa_results_dir)
+
diff --git a/constants_crunchy.py b/constants_crunchy.py
index c39d9f3..7391777 100644
--- a/constants_crunchy.py
+++ b/constants_crunchy.py
@@ -111,7 +111,7 @@ region_fine_tune_from_iter = 3000
 region_fine_tune_from = region_model + '-' + str(region_fine_tune_from_iter)
 
 # Object Attribute Classifier Evaluation Params
-region_eval_on = 'train' # One of {'val','test','train'}
+region_eval_on = 'val' # One of {'val','test','train'}
 region_model_to_eval = region_model + '-' + '77500'
 
 region_attribute_scores_dirname = os.path.join(
@@ -203,14 +203,20 @@ answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter)
 
 # Answer eval params
 answer_eval_on = 'val'
-answer_model_to_eval = answer_model + '-39000'
+answer_model_to_eval = answer_model + '-45000'
 
-answer_eval_data_json = os.path.join(
+vqa_results_dir = os.path.join(
     answer_output_dir,
+    'Results')
+
+mkdir_if_not_exists(vqa_results_dir)
+
+answer_eval_data_json = os.path.join(
+    vqa_results_dir,
     'eval_' + answer_eval_on + '_data.json')
 
 answer_eval_results_json = os.path.join(
-    answer_output_dir,
+    vqa_results_dir,
     'eval_' + answer_eval_on + '_results.json')
 
 # Select best model
@@ -220,3 +226,12 @@ step_size = 2000
 model_accuracies_txt = os.path.join(
     answer_output_dir,
     'model_accuracies.txt')
+
+# Fine Grained Evaluation File paths
+raw_vqa_val_ques_json = os.path.join(
+    vqa_basedir,
+    'MultipleChoice_mscoco_val2014_questions.json')
+
+raw_vqa_val_anno_json = os.path.join(
+    vqa_basedir,
+    'mscoco_val2014_annotations.json')
diff --git a/object_attribute_classifier_cached_features/eval.py b/object_attribute_classifier_cached_features/eval.py
index 1aae220..4d5cb0d 100644
--- a/object_attribute_classifier_cached_features/eval.py
+++ b/object_attribute_classifier_cached_features/eval.py
@@ -293,7 +293,8 @@ if __name__=='__main__':
     initializer = create_initializer(
         graph, 
         sess, 
-        constants.region_model_to_eval)
+        constants.answer_model_to_eval)
+        # constants.region_model_to_eval)
 
     print 'Creating feed dict creator...'
     feed_dict_creator = train.create_feed_dict_creator(graph.plh)
diff --git a/tftools/train.py b/tftools/train.py
new file mode 100644
index 0000000..6581392
--- /dev/null
+++ b/tftools/train.py
@@ -0,0 +1,144 @@
+""" This module defines a hepler class for multiple rate optimizers in Tensorflow
+The MultiRateOptimizer class provides a slightly simpler version of the tf.train.Optimizer API
+ - compute_gradients(loss)
+ - apply_gradients(grads_and_vars, global_step)
+ - minimize(loss, global_step)
+To initialize:
+ - MultiRateOptimizer([default_optimizer=tf.train.GradientDescentOptimizer])
+To add variables and corresponding optimizers use add_variables as
+ - add_variables(variables, optimizer)
+ - add_variables(variables, learning_rate, [other_params])
+Usage 1:
+    a = tf.Variable(1)
+    b = tf.Variable(2)
+    c = tf.Variable(3)
+    loss = f(a,b,c)
+    optimizer = MultiRateOptimizer(tf.train.GradientDescentOptimizer)
+    optimizer.add_variables([a,b], learning_rate=.1)
+    optimizer.add_variables([c], learning_rate=.01)
+    min_opt = optimizer.minimize(loss)
+    min_opt.eval()
+Usage 2:
+    a = tf.Variable(1)
+    b = tf.Variable(2)
+    c = tf.Variable(3)
+    loss = f(a,b,c)
+    optimizer = MultiRateOptimizer()
+    optimizer.add_variables([a,b], tf.train.GradientDescentOptimizer(.1))
+    optimizer.add_variables([c], tf.train.GradientDescentOptimizer(.01))
+    min_opt = optimizer.minimize(loss)
+    min_opt.eval()
+"""
+
+import tensorflow as tf
+import itertools
+
+
+class MultiRateOptimizer():
+    """ Class for managing a multi-rate optimization problem """
+
+    def __init__(self, default_optimizer=None):
+        self.optimizers = []
+        self.variables = []
+        self.default_optimizer = default_optimizer
+
+    def check_variables(self, variables):
+        """ Checks variables against the already added variables and returns a list
+        of reused variables.
+        variables (list of tf.variables): The variables to check
+        return (list of tf.variables): The variables which are already known to this optimizer.
+        """
+        in_variables = set(itertools.chain(*self.variables))
+
+        dupes = []
+        for v in variables:
+            if v in in_variables:
+                dupes.append(v)
+
+        return dupes
+
+    def add_variables(self,
+                      variables,
+                      optimizer=None,
+                      learning_rate=None,
+                      other_params={}):
+        """ Adds Variables and optimizers with different parameters.
+        variables (list of tf.variables): the variables to optimize wrt.
+        Either:
+        optimizer (tf.train.Optimizer): The corresponding optimizer.
+        Or:
+        learning_rate (float): A learning rate to pass to the default_optimizer
+        other_params (dict): A dictionary of param_name, value to pass the the default optimizer
+        """
+
+        print 'test'
+        chck_vars = self.check_variables(variables)
+        if len(chck_vars) != 0:
+            raise ValueError('Expected all new variables, got overlap', *
+                             [v.name for v in chck_vars])
+        assert (len(self.check_variables(variables)) == 0)
+        self.variables.append(variables)
+
+        if (optimizer is not None):
+            self.optimizers.append(optimizer)
+        else:
+            if self.default_optimizer is None:
+                raise ValueError(
+                    'default_optimizer is None',
+                    'When optimizer is not passed to add_variables, expect default_optimizer to be not None')
+
+            self.optimizers.append(
+                self.default_optimizer(learning_rate, **other_params))
+
+        return self
+
+    def compute_gradients(self, loss):
+        """ Computes gradients of loss for the variables added to this object.
+        This is the first part of minimize().  It returns a list of lists of
+        (gradient, variable) pairs where "gradient" is the gradient for "variable".
+        Args:
+        - loss: A Tensor containing the value to minimize
+        Returns:
+        A list of (gradient, variable) pairs
+        """
+
+        all_vars = list(itertools.chain(*self.variables))
+        gradients = tf.gradients(loss, all_vars)
+        gradient_vars = zip(gradients, all_vars)
+        shape_grad = []
+        idx_grad = 0
+        for vars in self.variables:
+            shape_grad.append(gradient_vars[idx_grad:idx_grad + len(vars)])
+            idx_grad += len(vars)
+        return shape_grad
+
+    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+        """ Apply gradients to variables according to the optimizers.
+        Args:
+         - grads_and_vars: list of lists of (gradient, variable) pairs as returned by compute_gradients
+         - global_step: Optional Variable to increment by one after the variables have been updated
+         - name: Optional name for the returned operation.
+        Returns:
+        An operation that applies the specified gradients.  If global_step was not None, operation
+        increments it also.
+        """
+
+        assert (len(self.optimizers) == len(grads_and_vars))
+
+        apply_grad_list = []
+        for optimizer, grad_and_var in zip(self.optimizers, grads_and_vars):
+            apply_grad_list.append(optimizer.apply_gradients(grad_and_var))
+
+        if global_step is not None:
+            apply_grad_list.append(global_step.assign_add(1))
+
+        return tf.group(*apply_grad_list, name=name)
+
+    def minimize(self, loss, global_step=None, name=None):
+        """
+        Add operations to minimize loss by updating the variables added
+        with add_variables.
+        The method combines compute_gradients() and apply_gradients().
+        """
+        gradients = self.compute_gradients(loss)
+        return self.apply_gradients(gradients, global_step, name=name)
diff --git a/vqa_parser.py b/vqa_parser.py
index 4af8fb8..3446d23 100644
--- a/vqa_parser.py
+++ b/vqa_parser.py
@@ -240,6 +240,37 @@ def list_of_val_question_ids(
         ujson.dump(qids, file)
 
 
+def counts_of_question_objects_and_attributes(
+        json_anno,
+        question_nouns_json,
+        question_adjectives_json):
+
+    with open(json_anno, 'r') as file:
+        anno_data = ujson.load(file)
+
+    nouns = dict()
+    adjectives = dict()
+    for question_data in anno_data.values():
+        for noun in question_data['question_nouns']:
+            if noun not in nouns:
+                nouns[noun] = 0
+            nouns[noun] += 1
+
+        for adjective in question_data['question_adjectives']:
+            if adjective not in adjectives:
+                adjectives[adjective] = 0
+            adjectives[adjective] += 1
+
+    sorted_nouns = sorted(nouns.items(), key=lambda x: x[1], reverse=True)
+    sorted_adjectives = sorted(adjectives.items(), key=lambda x: x[1], reverse=True)
+
+    with open(question_nouns_json, 'w') as file:
+        ujson.dump(sorted_nouns, file, indent=4)
+
+    with open(question_adjectives_json, 'w') as file:
+        ujson.dump(sorted_adjectives, file, indent=4)
+        
+
 if __name__=='__main__':
     datadir = '/home/ssd/VQA/'
     mode = 'val'
@@ -275,7 +306,14 @@ if __name__=='__main__':
         datadir,
         'answer_vocab.json')
 
-    
+    nouns_json_filename = os.path.join(
+        datadir,
+        'mscoco_' + mode + '2014_question_nouns.json')
+
+    adjectives_json_filename = os.path.join(
+        datadir,
+        'mscoco_' + mode + '2014_question_adjectives.json')
+
     ans_vocab_size = 5000
 
     # dump_questions_to_txt(
@@ -308,32 +346,36 @@ if __name__=='__main__':
     #     question_ids_txt_filename,
     #     annotations_with_parsed_questions_filename)
 
-    if mode=='train':
+    # if mode=='train':
         # create_ans_vocab(
         #     ans_vocab_size, 
         #     annotations_with_parsed_questions_filename, 
         #     answer_vocab_filename)
 
-        train_held_out_qids_json = os.path.join(
-            datadir,
-            'train_held_out_qids.json')
+        # train_held_out_qids_json = os.path.join(
+        #     datadir,
+        #     'train_held_out_qids.json')
 
-        train_subset_qids_json = os.path.join(
-            datadir,
-            'train_subset_qids.json')
+        # train_subset_qids_json = os.path.join(
+        #     datadir,
+        #     'train_subset_qids.json')
 
-        list_of_train_question_ids(
-            annotations_with_parsed_questions_filename,
-            0.05,
-            train_held_out_qids_json,
-            train_subset_qids_json)
+        # list_of_train_question_ids(
+        #     annotations_with_parsed_questions_filename,
+        #     0.05,
+        #     train_held_out_qids_json,
+        #     train_subset_qids_json)
         
-    if mode=='val':
-        val_qids_json = os.path.join(
-            datadir,
-            'val_qids.json')
-
-        list_of_val_question_ids(
-            annotations_with_parsed_questions_filename,
-            val_qids_json)
+    # if mode=='val':
+        # val_qids_json = os.path.join(
+        #     datadir,
+        #     'val_qids.json')
+
+        # list_of_val_question_ids(
+        #     annotations_with_parsed_questions_filename,
+        #     val_qids_json)
         
+    counts_of_question_objects_and_attributes(
+        annotations_with_parsed_questions_filename,
+        nouns_json_filename,
+        adjectives_json_filename)
-- 
GitLab