From edb072c066e00b4fc519e96a63b332a9b26e7952 Mon Sep 17 00:00:00 2001
From: tgupta6 <tgupta6@illinois.edu>
Date: Mon, 25 Apr 2016 13:57:55 -0500
Subject: [PATCH] parse questions and write to json file

---
 .gitignore                            |   4 +-
 question_parser/ParseQuestions.class  | Bin 0 -> 5200 bytes
 shapes_dataset/parse_all_questions.py |  59 ++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 question_parser/ParseQuestions.class
 create mode 100644 shapes_dataset/parse_all_questions.py

diff --git a/.gitignore b/.gitignore
index a0a86f1..3f430ed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,6 @@
 *.pyc
 shapes_dataset/images_old
 shapes_dataset/images
-shapes_dataset/*.json
\ No newline at end of file
+shapes_dataset/*.json
+shapes_dataset/*.txt
+question_parser/stanford-parser-full-2015-12-09/*
\ No newline at end of file
diff --git a/question_parser/ParseQuestions.class b/question_parser/ParseQuestions.class
new file mode 100644
index 0000000000000000000000000000000000000000..cd3891bfbacedbd646c364be73a7b52a61d11902
GIT binary patch
literal 5200
zcmb_g3wRXO75?u`lDnCmz_NKUAOcb)AtbAasDol53Su{FB#9WIk4bitEX+QbodrS>
zOY5uFcWo_Le4rQ~XnmB0m};q7+S*#}`~B3mzS`Q>wnBk;?(A-|VHeukeqS<k=H7Gv
zbMCqSIcH{{Km6p=0Om4}f*_(E#5m##5?G_)1F$)g3S>Cqk`lvfy;z6!;?mEtL4hjP
zd{n_kT*k3Ufe)8+d`y8Ko5khhV&Nw|xI)~o6p&X5>1vK^6imY9UVIYQdhsb-C%~`g
zxIw{GL^*C$Fdb0^C*dY9ZpJO*@@bA+#mHy8_$+So;By{q@nEY5w~Nuwd$A37cyT9g
z;<!t}$+%lQ(8xAy_u?Mht6&H2;}}pd5BDp001tZbkOw<GcvuWQqTmZ+%{DwL9v>^m
z<9Nb@FDlrDG{<fQ&BCK*?D1ew+@BP>H+e85G`_^~6vxvHWoJcV5oZB|TvvZFLrHTy
zY%+`wMq*|t740(ZcB9Lpk&>tpi80jFtq86%)*1oJi1h~A96J*0oi7$vv^hri>J}s6
z>T`UV4u)edoe0CYNOGZNbtDp|-E1TYth%lU$fXSB3ByjBizC9|1$99)oC+izBi0kQ
z!-1HU2)NUBz%tiorJ%V!(rs9g4Q9AiEVbts0lt{yD;&>oJWFRz$IbB^$5-ieIQDUT
zjl$3IbqX}cHyD)j)_0o;ClZg5<4RjRWp`&Cs&p+}lrod<Y?BCsiWY<z<`#Gwr7&8_
zJWtJ7uN6u5wKgwa(nNkX^=@GB9T`aqn2P7|O%<171w*AfABhK+UO3{Fif`cs72n2-
zDqg~OXlp9Ii|?uUK7PQ^RNzR$j(3~MWS}{2n?a+?wA$mV&Ddh2+lkx#4B+^oiXY*}
z4E~%o5&vbT5jJhHy`Rvb=VlhAdU{OT43EzKRK?GPKR?F{#F3TBh6WYCz%Nx?f=(5`
z5~EeQ)fu*A*3x09_%(h*XQAS^cv;0O_#MLpS2yKEtibt3vaii_a#3){7TUJaAB-d&
z6~D(H=<HOyD)#;wUT2tGuy@Bc&19gRt}5QZn;d^s@h9Pu6rw|uL!mbJ+De{>Lal8o
z{*1SX6Dt0K7l|PZQwtz1;`?YURs2<K{%=D485M8i@5Ce(|G>)(Cl##Cy3?O9!{?d_
zGZr>u-Tf;5iFb&3D)tM{Jl*kVBA$wc8A@ZxRM#p>3iqBcl1UW@@Gp*oDh`R`KP(=G
zRR+drOB<(gCaFvo2UhEbF)u|Ir6QI%PgcT8Nq?Hk%G@|i+o&=wR*V}<?lIiFuxpjs
zEe@hc>NA{Lu+MC69wSQ`8VXj89#)qTTOFkGk}`VDRw^nCbB?0D#5ST)!x1Hb$d&4L
zQnool?fQa6xm<8lfNF+4qI<%>*QFSZl2dSyYToF8%hXH;xlCQ^n6^PFcJ!SV^POQ<
zfg_o?<Y_N6E!uLNigCFWH^MEV(lD$nQc{bYSN(e`stjHyo>#xC>go&YEcFs|P0FyS
zZjPz%8T!nxr}|zcHdA=K0fIZqzT1l|n9ndJnMx$$wv)_@%LoI=0cvJsZgTIj_%ZM3
zR*Ru#xkAQIEbui)tk(8LBiv%(J-I5ox>zsE7G`Q;W;nkn7i(dK;mMgCu{Y}9nCR!V
z=*lcbsG($vu-mvjHkEqO?x_k=JC9P~NhYia(R*eQO4S$Pl*^7_d|j@)tG%W(;ugca
zLbLzDb|}ssN3<%+Q|e7Q!nw&MvXACVjYPtt9u~+N7@a#Nodj}ZBt2`!Am`GSba<bU
z44LaG4ka=7HkR~RR+AT&=&IB!iqXk!7jD}n#KHzOI!!oh4J$=kReB<J(rNCaf_)Uw
z(Mn06vJob|l6CZo<aUpkL+@#n0ktu6T`?Ln1dEU{5Avw7D~3i69OsN_Xc05n+?*w+
zDLk`Hi%L~fvFK#1=Hcc_8^H<^we?Yohm0siMGl(@%DiGIZV;DJH>EsUPGO+M(+e{6
zNF^#(wi%7D70|u$wPX5STYu6qDG%g$icY^eQ|TyaTIprip?orpDA^c!Im;WSQ%ndY
zLMLughi7OkmP7@`GuxjDmPnsTSz@4zWyLU!K171_*#nf&KfQ5+;X;z8(4Ai4P&#Wj
z*kdFxw9>WA9jrk)F2XXBg{ln+rF5U{mx2xQQ;_$dq-B;oCp5cp4@w76s+T-Qsy<X>
zA^}J)(DC$k2zr*G(U;RCnmvx@r_#t2dYXndn$;i;(?AJFJ4Xixy=-@Y-YJ$-dlD~!
zS^LuoQn*?^D-BNno}KiN@9qpzr<Yz*BX`T%Xs43^`8iT8Z(*U?gHRZ@Kq0})5E_Eo
zxf>P!aeFXcFByWbvtc(T=%qvOcWR}(QK^?{WocA(>U^KPS>tI``)ksu)jgW0WX>K;
zBq%Ye>0G1blR7oN8<PidoCLjGD<8xZ23?t@DQO(9dxtQUtWDF^oKP`@=`?bJJ}xJW
z*Su-W(0!V35bqP7t`O3R40~~sK0%w1#>_#~5vX7D`|D}{TQQv^5s9s+C8@!cDoC15
z(iTkAD#WVB+`>ULNmxEGe78K$rzwL75EkKPtt}CfYwcv+U+pX1j+!fcOdF^9(>P@i
zbHq|iCCJS4sS>urr;X2_9^TcsPikw_yn~oaQE+`eP50+ESjqG%&7ZMbtyO33hJ@tW
zou}7m6yekLTCFyXGyG?|yQt7AwVI6m%&_im)0cUe@9J=^Mz0zeenhL<has&hjkD4?
zTdyW(37b6KLh*6s1+)uI6{8{)T4lx=GMjZ~laR8`xL%wifF=rCE~G|_CuSjM5Tup|
zPjVKXBo}^ken#Fp;R<17bxF#c%BIKhe*eNW7HQ*-4AJb75XpKCsSTl-sB^9e)_MNL
zbV*|gEgTqrX#`{Z{{cfC#Q8*#(W80~epLK#7)#T*VBp{eQJCC%!WOwzWhND(I;t%7
zR8MB17PG06G~z^@f&k_~$LTl^XHe-nld6G^E}VrZ&W3{pxD4muN-V?;ScKc?7md4c
zF8wUB82hk<zJY^yooeF&`W_#TR_4btR*g0`7wt?(2Rj=Vvld*!R-%(d=;PXg!4l|V
z8_>-*!(`W@hh2|ewiA8q2}IaltYXi@Vy_^|UPYX}fi>(c`Z41GQc@Y#Nj|KXCgCdS
zOk6EB;~J?2*GeX?lj68u>c@@J&A3^*9k)n3aI17bwn>lT4(UbQDZPffq&IQ5^ftE3
zmAFT4#J%##*dd>R`{d;qkk{aT*}((yMm#9rj)&x3*h#heVR<hem7k@bc&O&G(=hcA
zYB^SNeDE+%;P?>7hdDlS2s7Op+&)Z!n2@Q(8LbhO%shv|(Z$it5qAGbrau$(VY$>D
z?%0p21CS0;wWaZcXyWKOfXOs8Ob1u<-(nL;uWN#=^kog5>WUF<KV*V+O>`WDhh`6p
uMnY<I?RW<b`^Bt}#t&f5F@#~_6s;BwsBqJQf~$y#CDe38e_ZX7(s~!XNs=T0

literal 0
HcmV?d00001

diff --git a/shapes_dataset/parse_all_questions.py b/shapes_dataset/parse_all_questions.py
new file mode 100644
index 0000000..5346715
--- /dev/null
+++ b/shapes_dataset/parse_all_questions.py
@@ -0,0 +1,59 @@
+import collections
+import os
+import json
+import sys
+import pdb
+
+
+if __name__=='__main__':
+    anno_json_file = sys.argv[1]
+    parsed_q_json_file = sys.argv[2]
+
+    with open(anno_json_file,'r') as file:
+        anno_data = json.load(file)
+
+    # write questions to a text file
+    q_txt_file = open('questions.txt', 'w')
+    q_id_file = open('question_ids.txt', 'w')
+    for item in anno_data:
+        # Get rid of last question mark while writing to file
+        q_txt_file.write(item['question'][:] + '\n')
+        q_id_file.write(str(item['question_id']) + '\n')
+    q_txt_file.close()
+    q_id_file.close()
+    os.system('wc -l questions.txt')
+        
+    os.chdir('../question_parser')
+    os.system( 
+        "java -mx1000m -cp '.:./stanford-parser-full-2015-12-09/*' \
+        ParseQuestions ./../shapes_dataset/questions.txt > \
+        ./../shapes_dataset/parsed_questions.txt")
+    os.chdir('../shapes_dataset')
+
+    parsed_q_file = open('parsed_questions.txt', 'r') 
+    parsed_q_id_file = open('question_ids.txt', 'r') 
+    parsed_questions = parsed_q_file.readlines()
+    question_ids = parsed_q_id_file.readlines()
+    parsed_q_file.close()
+    parsed_q_id_file.close()
+
+    parsed_q_json_data = []
+    for i, parsed_q in enumerate(parsed_questions):
+        splitted_line = parsed_q[2:-2].replace('?','').split('|')
+        parsed_q_json_data.append({
+            'question_id': int(question_ids[i][:-1]),
+            'question_parse': {
+                'bin0': splitted_line[0].rstrip().lstrip().split(' '),
+                'bin1': splitted_line[1].rstrip().lstrip().split(' '),
+                'bin2': splitted_line[2].rstrip().lstrip().split(' '),
+                'bin3': splitted_line[3].rstrip().lstrip().split(' '),
+            }
+        })
+
+
+    with open(parsed_q_json_file, 'w') as file:
+        json.dump(parsed_q_json_data, file, indent=4)
+
+        
+
+
-- 
GitLab