diff --git a/.gitignore b/.gitignore index a0a86f1a56a361c890e98387795ecbf1dff55e36..3f430ed162111ffc8a8d29dc41df0ea57973afc6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ *.pyc shapes_dataset/images_old shapes_dataset/images -shapes_dataset/*.json \ No newline at end of file +shapes_dataset/*.json +shapes_dataset/*.txt +question_parser/stanford-parser-full-2015-12-09/* \ No newline at end of file diff --git a/question_parser/ParseQuestions.class b/question_parser/ParseQuestions.class new file mode 100644 index 0000000000000000000000000000000000000000..cd3891bfbacedbd646c364be73a7b52a61d11902 Binary files /dev/null and b/question_parser/ParseQuestions.class differ diff --git a/shapes_dataset/parse_all_questions.py b/shapes_dataset/parse_all_questions.py new file mode 100644 index 0000000000000000000000000000000000000000..53467158df5a21df6785a35260113e8e0359d50e --- /dev/null +++ b/shapes_dataset/parse_all_questions.py @@ -0,0 +1,59 @@ +import collections +import os +import json +import sys +import pdb + + +if __name__=='__main__': + anno_json_file = sys.argv[1] + parsed_q_json_file = sys.argv[2] + + with open(anno_json_file,'r') as file: + anno_data = json.load(file) + + # write questions to a text file + q_txt_file = open('questions.txt', 'w') + q_id_file = open('question_ids.txt', 'w') + for item in anno_data: + # Get rid of last question mark while writing to file + q_txt_file.write(item['question'][:] + '\n') + q_id_file.write(str(item['question_id']) + '\n') + q_txt_file.close() + q_id_file.close() + os.system('wc -l questions.txt') + + os.chdir('../question_parser') + os.system( + "java -mx1000m -cp '.:./stanford-parser-full-2015-12-09/*' \ + ParseQuestions ./../shapes_dataset/questions.txt > \ + ./../shapes_dataset/parsed_questions.txt") + os.chdir('../shapes_dataset') + + parsed_q_file = open('parsed_questions.txt', 'r') + parsed_q_id_file = open('question_ids.txt', 'r') + parsed_questions = parsed_q_file.readlines() + question_ids = parsed_q_id_file.readlines() + parsed_q_file.close() + parsed_q_id_file.close() + + parsed_q_json_data = [] + for i, parsed_q in enumerate(parsed_questions): + splitted_line = parsed_q[2:-2].replace('?','').split('|') + parsed_q_json_data.append({ + 'question_id': int(question_ids[i][:-1]), + 'question_parse': { + 'bin0': splitted_line[0].rstrip().lstrip().split(' '), + 'bin1': splitted_line[1].rstrip().lstrip().split(' '), + 'bin2': splitted_line[2].rstrip().lstrip().split(' '), + 'bin3': splitted_line[3].rstrip().lstrip().split(' '), + } + }) + + + with open(parsed_q_json_file, 'w') as file: + json.dump(parsed_q_json_data, file, indent=4) + + + +