Skip to content
Snippets Groups Projects
constants_crunchy.py 6.85 KiB
Newer Older
  • Learn to ignore specific revisions
  • import os
    import pdb
    
    def mkdir_if_not_exists(dir_name):
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
            
    
    experiment_name = 'QA_classifier_wordvec_xform' #'QA_joint_pretrain_genome_split'
    
    # Global output directory (all subexperiments will be saved here)
    
    global_output_dir = '/home/tanmay/Code/GenVQA/Exp_Results/VQA'
    
    
    global_experiment_dir = os.path.join(
        global_output_dir,
        experiment_name)
    
    tb_log_dir = os.path.join(
        global_experiment_dir,
        'tensorboard_logdir')
    
    mkdir_if_not_exists(global_output_dir)
    mkdir_if_not_exists(global_experiment_dir)
    mkdir_if_not_exists(tb_log_dir)
    
    #height and width to which images are resized before feeding into networks
    image_size = (224, 224) 
    
    # Token to be used if object or attribute variable is unknown
    unknown_token = 'UNK'
    
    # Genome Data paths
    data_absolute_path = '/home/ssd/VisualGenome'
    
    image_dir = os.path.join(data_absolute_path, 'cropped_regions_large')
    genome_resnet_feat_dir = os.path.join(
        data_absolute_path,
        'cropped_regions_large_resnet_features')
    
    object_labels_json = os.path.join(
        data_absolute_path,
        'restructured/object_labels.json')
    
    attribute_labels_json = os.path.join(
        data_absolute_path,
        'restructured/attribute_labels.json')
    
    regions_json = os.path.join(
        data_absolute_path,
        'restructured/region_with_labels.json')
    
    mean_image_filename = os.path.join(
        data_absolute_path,
        'restructured/mean_image.jpg')
    
    vocab_json = os.path.join(
        data_absolute_path,
        'restructured/vocab_subset.json')
    
    
    genome_train_subset_region_ids = os.path.join(
        data_absolute_path,
        'restructured/train_subset_region_ids.json')
    
    genome_train_held_out_region_ids = os.path.join(
        data_absolute_path,
        'restructured/train_held_out_region_ids.json')
    
    genome_test_region_ids = os.path.join(
        data_absolute_path,
        'restructured/test_region_ids.json')
    
    
    num_object_labels = 1000
    num_attribute_labels = 1000
    
    # Regions data partition
    # First 80% meant to be used for training
    # Next 10% is set aside for validation
    # Last 10% is to be used for testing
    
    # num_total_regions = 1951768
    # num_train_regions = 1561416 # First 80%
    # num_val_regions = 195176 # Next 10%
    # num_test_regions = num_total_regions \
    #                    - num_train_regions \
    #                    - num_val_regions 
    
    
    # Pretrained resnet ckpt
    resnet_ckpt = '/home/tanmay/Downloads/pretrained_networks/' + \
                  'Resnet/tensorflow-resnet-pretrained-20160509/' + \
                  'ResNet-L50.ckpt'
    
    # Pretrained word vectors
    word2vec_binary = '/home/tanmay/Code/word2vec/word2vec-api-master/' + \
                      'GoogleNews-vectors-negative300.bin'
    
    word_vector_size = 300
    resnet_feat_dim = 2048
    
    # Numpy matrix storing vocabulary word vectors
    pretrained_vocab_word_vectors_npy = os.path.join(
        data_absolute_path,
        'restructured/pretrained_vocab_word_vectors.npy')
    
    # Object Attribute Classifier Training Params
    region_batch_size = 200
    
    tgupta6's avatar
    tgupta6 committed
    region_num_epochs = 6
    
    region_queue_size = 400
    
    region_regularization_coeff = 1e-5
    
    region_log_every_n_iter = 500
    region_output_dir = os.path.join(
        global_experiment_dir,
        'object_attribute_classifiers')
        
    mkdir_if_not_exists(region_output_dir)
    
    region_model = os.path.join(
        region_output_dir,
        'model')                    
    
    # Object Attribute Finetuning Params
    
    region_fine_tune_from_iter = 50500
    
    region_fine_tune_from = region_model + '-' + str(region_fine_tune_from_iter)
    
    
    # Object Attribute Model Selection
    region_start_model = 8000
    region_step_size = 8000
    region_model_accuracies_txt = os.path.join(
        region_output_dir,
        'model_accuracies.txt')
    
    
    # Object Attribute Classifier Evaluation Params
    
    region_eval_on = 'train_held_out' # One of {'test','train_held_out','train_subset'}
    
    region_model_to_eval = region_model + '-' + '102000'
    
    
    region_attribute_scores_dirname = os.path.join(
        region_output_dir,
        'attribute_scores')
    
    mkdir_if_not_exists(region_attribute_scores_dirname)
    
    # Answer prediction
    num_region_proposals = 100
    num_mcq_candidates = 18
    num_negative_answers = num_mcq_candidates - 1
    
    # VQA data paths
    vqa_basedir = '/home/ssd/VQA/'
    
    vqa_train_image_dir = os.path.join(
        vqa_basedir,
        'train2014_cropped_large')
    vqa_train_resnet_feat_dir = os.path.join(
        vqa_basedir,
        'train2014_cropped_large_resnet_features')
    vqa_train_anno = os.path.join(
        vqa_basedir,
        'mscoco_train2014_annotations_with_parsed_questions.json')
    
    tgupta6's avatar
    tgupta6 committed
    vqa_train_subset_qids = os.path.join(
        vqa_basedir,
        'train_subset_qids.json')
    vqa_train_held_out_qids = os.path.join(
        vqa_basedir,
        'train_held_out_qids.json')
    
    
    vqa_val_image_dir = os.path.join(
        vqa_basedir,
        'val2014_cropped_large')
    
    tgupta6's avatar
    tgupta6 committed
    vqa_val_resnet_feat_dir = os.path.join(
        vqa_basedir,
        'val2014_cropped_large_resnet_features')
    
    vqa_val_anno = os.path.join(
        vqa_basedir,
        'mscoco_val2014_annotations_with_parsed_questions.json')
    
    tgupta6's avatar
    tgupta6 committed
    vqa_val_qids = os.path.join(
        vqa_basedir,
        'val_qids.json')
    
    
    vqa_answer_vocab_json = os.path.join(
        vqa_basedir,
        'answer_vocab.json')
    
    # VQA dataset params
    
    tgupta6's avatar
    tgupta6 committed
    # num_train_questions = 248349
    # num_train_held_out_questions = 12500
    # num_train_subset_questions = num_train_questions - num_train_held_out_questions
    # num_val_questions = 121512
    # num_val_subset_questions = 10000
    
    tgupta6's avatar
    tgupta6 committed
    # num_test_questions = 0
    
    
    # Answer classifier training params
    answer_batch_size = 50
    
    answer_offset = 0
    
    tgupta6's avatar
    tgupta6 committed
    answer_obj_atr_loss_wt = 0.1
    
    answer_regularization_coeff = 1e-5
    answer_queue_size = 500
    answer_embedding_dim = 600
    
    answer_log_every_n_iter = 500
    answer_output_dir = os.path.join(
        global_experiment_dir,
        'answer_classifiers')
        
    mkdir_if_not_exists(answer_output_dir)
    
    
    pretrained_model = '/home/tanmay/Code/GenVQA/Exp_Results/VQA/' + \
    
        'object_attribute_classifier_wordvec_xform/' + \
        'object_attribute_classifiers/model-102000'
    
    
    answer_model = os.path.join(
        answer_output_dir,
        'model')
    
    # Answer classifier additional joint training params
    num_regions_with_labels = 100
    
    # Answer fine tune params
    
    answer_fine_tune_from_iter = 13000
    
    answer_fine_tune_from = answer_model + '-' + str(answer_fine_tune_from_iter)
    
    # Answer eval params
    
    tgupta6's avatar
    tgupta6 committed
    answer_eval_on = 'val'
    
    answer_model_to_eval = answer_model + '-42000'
    
    vqa_results_dir = os.path.join(
    
        answer_output_dir,
    
        'Results')
    
    mkdir_if_not_exists(vqa_results_dir)
    
    answer_eval_data_json = os.path.join(
        vqa_results_dir,
    
        'eval_' + answer_eval_on + '_data.json')
    
    
    tgupta6's avatar
    tgupta6 committed
    answer_eval_results_json = os.path.join(
    
    tgupta6's avatar
    tgupta6 committed
        'eval_' + answer_eval_on + '_results.json')
    
    tgupta6's avatar
    tgupta6 committed
    # Select best model
    models_dir = answer_output_dir
    
    tgupta6's avatar
    tgupta6 committed
    step_size = 2000
    model_accuracies_txt = os.path.join(
        answer_output_dir,
        'model_accuracies.txt')
    
    
    # Fine Grained Evaluation File paths
    raw_vqa_val_ques_json = os.path.join(
        vqa_basedir,
        'MultipleChoice_mscoco_val2014_questions.json')
    
    raw_vqa_val_anno_json = os.path.join(
        vqa_basedir,
        'mscoco_val2014_annotations.json')