diff --git a/answer_classifier_cached_features/train.py b/answer_classifier_cached_features/train.py index 8f383c6cc6155ef6001094aceba52d309ad0590b..03b406832208af20ccca9425a4f612c44906730f 100644 --- a/answer_classifier_cached_features/train.py +++ b/answer_classifier_cached_features/train.py @@ -3,6 +3,7 @@ import object_attribute_classifier_cached_features.inference as feature_graph import region_relevance_network.inference as relevance_graph import answer_classifier_cached_features.inference as answer_graph from tftools import var_collect, placeholder_management +import tftools.train as multi_rate_train import tftools.data import losses import constants @@ -529,7 +530,9 @@ class attach_optimizer(): self.lr = lr with graph.tf_graph.as_default(): all_trainable_vars = tf.trainable_variables() - self.not_to_train = []#+ graph.object_attribute_vars + + self.not_to_train = graph.object_attribute_vars + graph.word_vec_vars + vars_to_train = [ var for var in all_trainable_vars if var not in self.not_to_train] @@ -541,19 +544,35 @@ class attach_optimizer(): all_vars = tf.all_variables() self.ops = dict() + + self.optimizer = multi_rate_train.MultiRateOptimizer( + tf.train.AdamOptimizer) - self.add_adam_optimizer( - graph.total_loss, + self.optimizer.add_variables( + self.graph.object_attribute_vars + self.graph.word_vec_vars, + learning_rate = 0.1*self.lr) + + + self.optimizer.add_variables( vars_to_train, - 'optimizer') + learning_rate = self.lr) - self.train_op = self.group_all_train_ops() + self.train_op = self.optimizer.minimize(graph.total_loss) + + # self.add_adam_optimizer( + # graph.total_loss, + # vars_to_train, + # 'optimizer') + + # self.train_op = self.group_all_train_ops() + all_vars_with_opt_vars = tf.all_variables() self.opt_vars = [var for var in all_vars_with_opt_vars if var not in all_vars] def filter_out_vars_to_train(self, var_list): return [var for var in var_list if var not in self.not_to_train] + def add_adam_optimizer(self, loss, var_list, name): var_list = self.filter_out_vars_to_train(var_list) if not var_list: diff --git a/constants_crunchy.py b/constants_crunchy.py index 7391777dbf87ba0c712520edd97ac001f1ee51b3..5046e0562dd18086514ec64fec623db221c37ec6 100644 --- a/constants_crunchy.py +++ b/constants_crunchy.py @@ -203,7 +203,7 @@ answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter) # Answer eval params answer_eval_on = 'val' -answer_model_to_eval = answer_model + '-45000' +answer_model_to_eval = answer_model + '-18500' vqa_results_dir = os.path.join( answer_output_dir, diff --git a/constants_vision_gpu_1.py b/constants_vision_gpu_1.py new file mode 100644 index 0000000000000000000000000000000000000000..99378eb64e9666cef73dcd0eaf6d3d338f24b499 --- /dev/null +++ b/constants_vision_gpu_1.py @@ -0,0 +1,219 @@ +import os +import pdb + +def mkdir_if_not_exists(dir_name): + if not os.path.exists(dir_name): + os.mkdir(dir_name) + +experiment_name = 'QA_explicit_dot_joint_training_pretrained_fix_pretrained' +#experiment_name = 'object_attribute_classifier_large_images' +# Global output directory (all subexperiments will be saved here) +global_output_dir = '/data/tanmay/GenVQA_Exp_Results' + +global_experiment_dir = os.path.join( + global_output_dir, + experiment_name) + +tb_log_dir = os.path.join( + global_experiment_dir, + 'tensorboard_logdir') + +mkdir_if_not_exists(global_output_dir) +mkdir_if_not_exists(global_experiment_dir) +mkdir_if_not_exists(tb_log_dir) + +#height and width to which images are resized before feeding into networks +image_size = (224, 224) + +# Token to be used if object or attribute variable is unknown +unknown_token = 'UNK' + +# Genome Data paths +data_absolute_path = '/home/nfs/tgupta6/data/VisualGenome' + +image_dir = os.path.join(data_absolute_path, 'cropped_regions_large') +genome_resnet_feat_dir = os.path.join( + data_absolute_path, + 'cropped_regions_large_resnet_features') + +object_labels_json = os.path.join( + data_absolute_path, + 'restructured/object_labels.json') + +attribute_labels_json = os.path.join( + data_absolute_path, + 'restructured/attribute_labels.json') + +regions_json = os.path.join( + data_absolute_path, + 'restructured/region_with_labels.json') + +mean_image_filename = os.path.join( + data_absolute_path, + 'restructured/mean_image.jpg') + +vocab_json = os.path.join( + data_absolute_path, + 'restructured/vocab_subset.json') + +num_object_labels = 1000 +num_attribute_labels = 1000 + +# Regions data partition +# First 80% meant to be used for training +# Next 10% is set aside for validation +# Last 10% is to be used for testing +num_total_regions = 1951768 +num_train_regions = 1561416 # First 80% +num_val_regions = 195176 # Next 10% +num_test_regions = num_total_regions \ + - num_train_regions \ + - num_val_regions + +# Pretrained resnet ckpt +resnet_ckpt = '/home/nfs/tgupta6/data/Resnet/' + \ + 'ResNet-L50.ckpt' + +# Pretrained word vectors +word2vec_binary = '/home/nfs/tgupta6/data/word_vectors/' + \ + 'GoogleNews-vectors-negative300.bin' + +word_vector_size = 300 +resnet_feat_dim = 2048 + +# Numpy matrix storing vocabulary word vectors +pretrained_vocab_word_vectors_npy = os.path.join( + data_absolute_path, + 'restructured/pretrained_vocab_word_vectors.npy') + +# Object Attribute Classifier Training Params +region_batch_size = 200 +region_num_samples = num_train_regions +region_num_epochs = 4 +region_offset = 0 +region_queue_size = 400 +region_regularization_coeff = 1e-4 +region_lr = 1e-3 +region_log_every_n_iter = 500 +region_output_dir = os.path.join( + global_experiment_dir, + 'object_attribute_classifiers') + +mkdir_if_not_exists(region_output_dir) + +region_model = os.path.join( + region_output_dir, + 'model') + +# Object Attribute Finetuning Params +region_fine_tune_from_iter = 3000 +region_fine_tune_from = region_model + '-' + str(region_fine_tune_from_iter) + +# Object Attribute Classifier Evaluation Params +region_eval_on = 'val' # One of {'val','test','train'} +region_model_to_eval = region_model + '-' + '77500' + +region_attribute_scores_dirname = os.path.join( + region_output_dir, + 'attribute_scores') + +mkdir_if_not_exists(region_attribute_scores_dirname) + +# Answer prediction +num_region_proposals = 100 +num_mcq_candidates = 18 +num_negative_answers = num_mcq_candidates - 1 + +# VQA data paths +vqa_basedir = '/home/nfs/tgupta6/data/VQA/' + +vqa_train_image_dir = os.path.join( + vqa_basedir, + 'train2014_cropped_large') +vqa_train_resnet_feat_dir = os.path.join( + vqa_basedir, + 'train2014_cropped_large_resnet_features') +vqa_train_anno = os.path.join( + vqa_basedir, + 'mscoco_train2014_annotations_with_parsed_questions.json') +vqa_train_subset_qids = os.path.join( + vqa_basedir, + 'train_subset_qids.json') +vqa_train_held_out_qids = os.path.join( + vqa_basedir, + 'train_held_out_qids.json') + +vqa_val_image_dir = os.path.join( + vqa_basedir, + 'val2014_cropped_large') +vqa_val_resnet_feat_dir = os.path.join( + vqa_basedir, + 'val2014_cropped_large_resnet_features') +vqa_val_anno = os.path.join( + vqa_basedir, + 'mscoco_val2014_annotations_with_parsed_questions.json') +vqa_val_qids = os.path.join( + vqa_basedir, + 'val_qids.json') + +vqa_answer_vocab_json = os.path.join( + vqa_basedir, + 'answer_vocab.json') + +# VQA dataset params +# num_train_questions = 248349 +# num_val_subset_questions = 10000 +# num_val_questions = 121512 +# num_val_rest_questions = num_val_questions - num_val_subset_questions +# num_test_questions = 0 + +# Answer classifier training params +answer_batch_size = 50 +answer_num_epochs = 10 +answer_offset = 0 +answer_obj_atr_loss_wt = 0.1 +answer_regularization_coeff = 1e-5 +answer_queue_size = 500 +answer_embedding_dim = 600 +answer_lr = 1e-4 +answer_log_every_n_iter = 500 +answer_output_dir = os.path.join( + global_experiment_dir, + 'answer_classifiers') + +mkdir_if_not_exists(answer_output_dir) + +pretrained_model = '/home/nfs/tgupta6/projects/GenVQA/Exp_Results/' +\ + 'pretrained_object_attribute_classifier/' +\ + 'obj_atr_model_77500' + +answer_model = os.path.join( + answer_output_dir, + 'model') + +# Answer classifier additional joint training params +num_regions_with_labels = 100 + +# Answer fine tune params +answer_fine_tune_from_iter = 22500 +answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter) + +# Answer eval params +answer_eval_on = 'val' +answer_model_to_eval = answer_model + '-69500' + +answer_eval_data_json = os.path.join( + answer_output_dir, + 'eval_' + answer_eval_on + '_data.json') + +answer_eval_results_json = os.path.join( + answer_output_dir, + 'eval_' + answer_eval_on + '_results.json') + +# Select best model +models_dir = answer_output_dir +start_model = 1000 +step_size = 2000 +model_accuracies_txt = os.path.join( + answer_output_dir, + 'model_accuracies.txt') diff --git a/constants_vision_gpu_2.py b/constants_vision_gpu_2.py index c19799a77654f0250e010c98b083580367aa63fe..aa195ba13d2ec704bef230a185a42b23a028c1f6 100644 --- a/constants_vision_gpu_2.py +++ b/constants_vision_gpu_2.py @@ -5,7 +5,7 @@ def mkdir_if_not_exists(dir_name): if not os.path.exists(dir_name): os.mkdir(dir_name) -experiment_name = 'QA_explicit_dot_joint_training_pretrained_multi_rate' +experiment_name = 'QA_explicit_dot_joint_training_pretrained_fix_pretrained' #experiment_name = 'object_attribute_classifier_large_images' # Global output directory (all subexperiments will be saved here) global_output_dir = '/data/tanmay/GenVQA_Exp_Results' @@ -110,7 +110,7 @@ region_fine_tune_from_iter = 3000 region_fine_tune_from = region_model + '-' + str(region_fine_tune_from_iter) # Object Attribute Classifier Evaluation Params -region_eval_on = 'train' # One of {'val','test','train'} +region_eval_on = 'val' # One of {'val','test','train'} region_model_to_eval = region_model + '-' + '77500' region_attribute_scores_dirname = os.path.join( @@ -136,6 +136,12 @@ vqa_train_resnet_feat_dir = os.path.join( vqa_train_anno = os.path.join( vqa_basedir, 'mscoco_train2014_annotations_with_parsed_questions.json') +vqa_train_subset_qids = os.path.join( + vqa_basedir, + 'train_subset_qids.json') +vqa_train_held_out_qids = os.path.join( + vqa_basedir, + 'train_held_out_qids.json') vqa_val_image_dir = os.path.join( vqa_basedir, @@ -146,6 +152,9 @@ vqa_val_resnet_feat_dir = os.path.join( vqa_val_anno = os.path.join( vqa_basedir, 'mscoco_val2014_annotations_with_parsed_questions.json') +vqa_val_qids = os.path.join( + vqa_basedir, + 'val_qids.json') vqa_answer_vocab_json = os.path.join( vqa_basedir, @@ -166,8 +175,7 @@ answer_obj_atr_loss_wt = 0.1 answer_regularization_coeff = 1e-5 answer_queue_size = 500 answer_embedding_dim = 600 -obj_atr_lr = 1e-4 -answer_lr = 1e-3 +answer_lr = 1e-4 answer_log_every_n_iter = 500 answer_output_dir = os.path.join( global_experiment_dir, @@ -187,12 +195,12 @@ answer_model = os.path.join( num_regions_with_labels = 100 # Answer fine tune params -answer_fine_tune_from_iter = 19500 +answer_fine_tune_from_iter = 18500 answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter) # Answer eval params answer_eval_on = 'val' -answer_model_to_eval = answer_model + '-13000' +answer_model_to_eval = answer_model + '-18500' answer_eval_data_json = os.path.join( answer_output_dir, diff --git a/object_attribute_classifier_cached_features/eval.py b/object_attribute_classifier_cached_features/eval.py index 4d5cb0d751f0983a9ec863897191e59a8c1054a8..48ba6b9d826a070f8d0b3353e97adafc4c9a6313 100644 --- a/object_attribute_classifier_cached_features/eval.py +++ b/object_attribute_classifier_cached_features/eval.py @@ -232,7 +232,7 @@ def eval( eval_vars_dict = { var_name: eval_var for var_name, eval_var in zip(vars_to_eval_names, eval_vars)} - # print batch['region_ids'] + # print batch['region_ids'] labels = dict() labels['objects'] = batch['object_labels'] labels['attributes'] = batch['attribute_labels']