Skip to content
Snippets Groups Projects
Commit edb072c0 authored by tgupta6's avatar tgupta6
Browse files

parse questions and write to json file

parent f79fea89
No related branches found
No related tags found
No related merge requests found
......@@ -2,4 +2,6 @@
*.pyc
shapes_dataset/images_old
shapes_dataset/images
shapes_dataset/*.json
\ No newline at end of file
shapes_dataset/*.json
shapes_dataset/*.txt
question_parser/stanford-parser-full-2015-12-09/*
\ No newline at end of file
File added
import collections
import os
import json
import sys
import pdb
if __name__=='__main__':
anno_json_file = sys.argv[1]
parsed_q_json_file = sys.argv[2]
with open(anno_json_file,'r') as file:
anno_data = json.load(file)
# write questions to a text file
q_txt_file = open('questions.txt', 'w')
q_id_file = open('question_ids.txt', 'w')
for item in anno_data:
# Get rid of last question mark while writing to file
q_txt_file.write(item['question'][:] + '\n')
q_id_file.write(str(item['question_id']) + '\n')
q_txt_file.close()
q_id_file.close()
os.system('wc -l questions.txt')
os.chdir('../question_parser')
os.system(
"java -mx1000m -cp '.:./stanford-parser-full-2015-12-09/*' \
ParseQuestions ./../shapes_dataset/questions.txt > \
./../shapes_dataset/parsed_questions.txt")
os.chdir('../shapes_dataset')
parsed_q_file = open('parsed_questions.txt', 'r')
parsed_q_id_file = open('question_ids.txt', 'r')
parsed_questions = parsed_q_file.readlines()
question_ids = parsed_q_id_file.readlines()
parsed_q_file.close()
parsed_q_id_file.close()
parsed_q_json_data = []
for i, parsed_q in enumerate(parsed_questions):
splitted_line = parsed_q[2:-2].replace('?','').split('|')
parsed_q_json_data.append({
'question_id': int(question_ids[i][:-1]),
'question_parse': {
'bin0': splitted_line[0].rstrip().lstrip().split(' '),
'bin1': splitted_line[1].rstrip().lstrip().split(' '),
'bin2': splitted_line[2].rstrip().lstrip().split(' '),
'bin3': splitted_line[3].rstrip().lstrip().split(' '),
}
})
with open(parsed_q_json_file, 'w') as file:
json.dump(parsed_q_json_data, file, indent=4)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment