Commit 420178d6 authored by Christos Christodoulopoulos's avatar Christos Christodoulopoulos
Browse files

Gold mode working

parent 71ac4e16
package jubilee.datastructure;
import jubilee.util.DataManager;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
......@@ -43,6 +44,56 @@ public class JBCorpus {
readCorpus();
}
/**
* A constructor used in "gold" mode. It uses a set of corpora to initialise the gold corpus.
* This is needed in case of missing/added labels in any of the corpora.
*/
public JBCorpus(JBCorpus[] corpora) {
annotator = DataManager.GOLD_ID;
// Make sure that the contexts match across corpora
int contextLength = corpora[0].contexts.size();
for (JBCorpus corpus : corpora) {
if (corpus.contexts.size() != contextLength) {
System.err.println("Contexts don't match across corpora");
System.exit(-1);
}
}
// Now just take the contexts from the first corpus
contexts = corpora[0].contexts;
currentAnnotationIndex = 0;
currentAnnotationOriginal = new JBDataStructure();
// Aggregate the annotations across corpora
contextToAnnotation = new HashMap<Integer, List<JBDataStructure>>();
annotations = new ArrayList<JBDataStructure>();
for (int i = 0; i < contexts.size(); i++) {
for (JBCorpus corpus : corpora) {
if (!corpus.contextToAnnotation.containsKey(i)) continue;
List<JBDataStructure> tempOtherList = corpus.contextToAnnotation.get(i);
List<JBDataStructure> tempList;
if (!contextToAnnotation.containsKey(i))
tempList = new ArrayList<JBDataStructure>();
else tempList = corpus.contextToAnnotation.get(i);
for (JBDataStructure otherAnn : tempOtherList) {
if (!existsInList(tempList, otherAnn)) {
tempList.add(otherAnn);
annotations.add(otherAnn);
}
}
contextToAnnotation.put(i, tempList);
}
}
}
private boolean existsInList(List<JBDataStructure> annotationList, JBDataStructure otherAnnotation) {
for (JBDataStructure ann : annotationList) {
if (ann.getPredicateIndex() == otherAnnotation.getPredicateIndex()
&& ann.getType().equals(otherAnnotation.getType()))
return true;
}
return false;
}
public void readCorpus() throws Exception {
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(corpusFile)));
String fileString = "";
......@@ -133,8 +184,12 @@ public class JBCorpus {
}
}
public void copyCurrent(JBCorpus otherCorpus) {
getCurrentAnnotation().copyFrom(otherCorpus.getCurrentAnnotation(), false);
public void copyCurrent(JBDataStructure otherAnnotation) {
JBDataStructure newAnnotation = new JBDataStructure();
contextToAnnotation.get(otherAnnotation.getIndexInContext()).remove(getCurrentAnnotation());
newAnnotation.copyFrom(otherAnnotation, false);
annotations.set(currentAnnotationIndex, newAnnotation);
contextToAnnotation.get(newAnnotation.getIndexInContext()).add(newAnnotation);
}
public void revertChanges() throws Exception {
......
......@@ -29,6 +29,9 @@ public class JBDataStructure {
/** The indexInContext of the annotation in the context */
private int indexInContext;
/** The start token span of the predicate */
private int predicateIndex;
/** Indicates that some part of the structure has changed (to prompt for a save dialogue) */
private boolean changed;
......@@ -41,6 +44,7 @@ public class JBDataStructure {
public JBDataStructure() {
this.indexInContext = -1;
this.changed = false;
this.annotator = "";
}
public JBDataStructure(String treeString, String pbInstanceString, int indexInContext) throws Exception {
......@@ -137,6 +141,8 @@ public class JBDataStructure {
tree.moveTo(terminalIdx, height);
loc += symbol + terminalIdx + ":" + height;
if (arg.equals(REL))
predicateIndex = terminalIdx;
tree.setArg(loc, arg);
if (tok_description.hasMoreTokens()) // get symbol
......@@ -209,6 +215,7 @@ public class JBDataStructure {
public void copyFrom(JBDataStructure other, boolean copyAnnotator) {
this.indexInContext = other.indexInContext;
this.predicateIndex = other.predicateIndex;
this.tbTree = other.tbTree.copy();
this.type = other.type;
this.roleset = other.roleset;
......@@ -237,6 +244,9 @@ public class JBDataStructure {
public int getIndexInContext() {
return indexInContext;
}
public int getPredicateIndex() {
return predicateIndex;
}
public String getType() {
return type;
}
......
......@@ -186,6 +186,11 @@ public class JBOpenDialog extends JDialog implements ActionListener, ItemListene
vec.add(task);
if (id.equalsIgnoreCase(jbtk.getUserID())) {
if (jbtk.isGold()) {
// Add the number of existing annotations for each task
int annotations = countAnnotations(task, tmp2);
if (annotations > 0) task += " (" + annotations + ")";
}
lm_newTask.removeElement(task);
lm_myTask.addElement(anAnnlist);
} else if (!jbtk.isGold()) {
......
......@@ -39,12 +39,13 @@ import javax.swing.text.Highlighter;
import java.awt.*;
import java.awt.event.*;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.*;
import java.util.List;
@SuppressWarnings("serial")
public class JBToolkit extends JFrame implements ActionListener, ItemListener, ListSelectionListener {
public final static String EDITED = "[EDITED] ";
public final static String GOLD_MISSING = "MISSING";
private String str_frameTitle;
private String str_userID;
// dataset[0] = resource, dataset[1] = directory-paths
......@@ -95,7 +96,7 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
addWindowListener(new WindowAdapter() {
@Override
public void windowClosing(WindowEvent e) {
if (!title.startsWith(EDITED)) {
if (!getTitle().startsWith(EDITED)) {
setVisible(false);
showOpenDialog();
}
......@@ -211,7 +212,7 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
ls_gold.setFont(new Font("Courier", Font.PLAIN, 12));
ls_gold.addListSelectionListener(this);
ls_gold.setVisibleRowCount(3);
ls_gold.setVisibleRowCount(4);
ls_gold.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
top.add(new JScrollPane(ls_gold), BorderLayout.SOUTH);
}
......@@ -273,25 +274,23 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
setTitle(str_frameTitle + " - " + filename[0]);
if (isNewTask) {
if (isGold()) {
String taskFile = str_dataset.get(DataManager.ANNOTATION) + File.separator + filename[1];
corpus = new JBCorpus(taskFile, DataManager.GOLD_ID);
}
else {
String taskFile = str_dataset.get(DataManager.TASK) + File.separator + filename[0];
corpus = new JBCorpus(taskFile, str_userID);
}
}
else
corpus = new JBCorpus(str_annFile, str_userID);
String taskFile = str_dataset.get(DataManager.TASK) + File.separator + filename[0];
corpus = new JBCorpus(taskFile, str_userID);
}
else
corpus = new JBCorpus(str_annFile, str_userID);
if (isGold()) {
if (isGold()) {
moreCorpora = new JBCorpus[filename.length-1];
for (int i=1; i<filename.length; i++) {
for (int i=1; i<filename.length; i++) {
filename[i] = str_dataset.get(DataManager.ANNOTATION) + File.separator + filename[i];
moreCorpora[i-1] = new JBCorpus(filename[i], DataManager.GOLD_ID);
}
}
String annotator = filename[i].substring(filename[i].lastIndexOf('.')+1);
moreCorpora[i-1] = new JBCorpus(filename[i], annotator);
}
if (isNewTask) {
corpus = new JBCorpus(moreCorpora);
}
}
framesetPanel.setCorpus(corpus);
......@@ -339,9 +338,16 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
}
public void valueChanged(ListSelectionEvent e) {
// Used to accept one of the annotations as gold
if (e.getSource() == ls_gold && ls_gold.getSelectedIndex() > 0) {
corpus.copyCurrent(moreCorpora[ls_gold.getSelectedIndex() - 1]);
updateGoldTopList();
if (lm_gold.get(ls_gold.getSelectedIndex()).equals(GOLD_MISSING)) return;
JBDataStructure alignedAnnotation = findAlignedAnnotation(moreCorpora[ls_gold.getSelectedIndex() - 1],
corpus.getCurrentAnnotation());
corpus.copyCurrent(alignedAnnotation);
corpus.getCurrentAnnotation().setAnnotator(DataManager.GOLD_ID);
corpus.getCurrentAnnotation().hasChanged(true);
updateGoldTopList();
updateAll();
}
}
......@@ -394,30 +400,62 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
else bt_prev.setEnabled(true);
}
private void updateGoldList() {
if (isGold()) {
private void updateGoldList() {
if (isGold()) {
lm_gold.removeAllElements();
int maxLength = maxAnnotationLength();
JBDataStructure annotation = corpus.getCurrentAnnotation();
lm_gold.addElement(StringManager.addIndent(annotation.getAnnotator(), 12) + annotation.getPropbankTree());
lm_gold.addElement(StringManager.addIndent("GOLD", 12) +
StringManager.addIndent(annotation.getPropbankTree(), maxLength + 3) +
annotation.getTbTree().toTextTreeCompact());
for (JBCorpus otherCorpus : moreCorpora) {
otherCorpus.setCurrentAnnotationIndex(comboJump.getSelectedIndex());
JBDataStructure otherAnnotation = corpus.getCurrentAnnotation();
lm_gold.addElement(StringManager.addIndent(otherAnnotation.getAnnotator(), 12) +
otherAnnotation.getPropbankTree());
JBDataStructure otherAnnotation = findAlignedAnnotation(otherCorpus, annotation);
// Check for deleted annotations
if (otherAnnotation == null) {
lm_gold.addElement(GOLD_MISSING);
}
else {
lm_gold.addElement(StringManager.addIndent(otherAnnotation.getAnnotator(), 12) +
StringManager.addIndent(otherAnnotation.getPropbankTree(), maxLength + 3) +
otherAnnotation.getTbTree().toTextTreeCompact());
}
}
ls_gold.invalidate();
}
}
ls_gold.invalidate();
}
}
//TODO What does this do?
void updateGoldTopList() {
if (isGold()) {
private JBDataStructure findAlignedAnnotation(JBCorpus otherCorpus, JBDataStructure annotation) {
List<JBDataStructure> otherAnnotations = otherCorpus.getAnnotationsFromContext(annotation.getIndexInContext());
if (otherAnnotations == null) return null;
for (JBDataStructure otherAnnotation : otherAnnotations) {
if (otherAnnotation.getPredicateIndex() == annotation.getPredicateIndex() &&
otherAnnotation.getType().equals(annotation.getType()))
return otherAnnotation;
}
return null;
}
private int maxAnnotationLength() {
int max = 0;
for (JBCorpus otherCorpus : moreCorpora) {
JBDataStructure otherAnnotation = findAlignedAnnotation(otherCorpus, corpus.getCurrentAnnotation());
if (otherAnnotation == null) continue;
int length = otherAnnotation.getPropbankTree().length();
if (length > max) max = length;
}
return max;
}
void updateGoldTopList() {
if (isGold()) {
int maxLength = maxAnnotationLength();
JBDataStructure annotation = corpus.getCurrentAnnotation();
lm_gold.setElementAt(StringManager.addIndent(annotation.getAnnotator(), 12)
+ annotation.getPropbankTree(), 0);
ls_gold.invalidate();
}
}
lm_gold.setElementAt(StringManager.addIndent("GOLD", 12) +
StringManager.addIndent(annotation.getPropbankTree(), maxLength + 3) +
annotation.getTbTree().toTextTreeCompact(), 0);
ls_gold.invalidate();
}
}
private void actionButtonTreeEdit() {
new JBTreeEditPanel(this, corpus.getCurrentAnnotation());
......@@ -549,6 +587,8 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
// ------------------ Menu-Treebank Action ------------------
private void actionBtPrev() {
if (isGold())
corpus.getCurrentAnnotation().setAnnotator(DataManager.GOLD_ID);
for (int i=corpus.getCurrentAnnotationIndex(); i>0; i--) {
corpus.setCurrentAnnotationIndex(i - 1);
if (!isGold() || !isGoldSame()) break;
......@@ -557,6 +597,8 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
}
private void actionBtNext() {
if (isGold())
corpus.getCurrentAnnotation().setAnnotator(DataManager.GOLD_ID);
for (int i=corpus.getCurrentAnnotationIndex(); i<corpus.getSize()-1; i++) {
corpus.setCurrentAnnotationIndex(i + 1);
if (!isGold() || !isGoldSame()) break;
......@@ -566,16 +608,18 @@ public class JBToolkit extends JFrame implements ActionListener, ItemListener, L
private boolean isGoldSame() {
String tmpPBT = corpus.getCurrentAnnotation().getTbTree().toPropbank();
String tmpT = corpus.getCurrentAnnotation().getTbTree().toTextTreeCompact();
String tmpRS = corpus.getCurrentAnnotation().getRoleset();
for (JBCorpus aPb_more : moreCorpora) {
aPb_more.setCurrentAnnotationIndex(corpus.getCurrentAnnotationIndex());
if (!tmpPBT.equals(aPb_more.getCurrentAnnotation().getTbTree().toPropbank())
|| !tmpRS.equals(aPb_more.getCurrentAnnotation().getRoleset()))
for (JBCorpus otherCorpus : moreCorpora) {
JBDataStructure otherAnnotation = findAlignedAnnotation(otherCorpus, corpus.getCurrentAnnotation());
if (otherAnnotation == null) return false;
if (!tmpPBT.equals(otherAnnotation.getTbTree().toPropbank())
|| !tmpRS.equals(otherAnnotation.getRoleset())
|| !tmpT.equals(otherAnnotation.getTbTree().toTextTreeCompact()))
return false;
}
corpus.getCurrentAnnotation().setAnnotator(DataManager.GOLD_ID);
return true;
}
......
......@@ -31,7 +31,7 @@ import java.io.File;
* @since 5/11/2014
*/
public class Jubilee {
public static final String VERSION = "3.25";
public static final String VERSION = "3.26";
static public void main(String[] args) {
// Migrate the menu to the Mac OSX menu bar
if (System.getProperty("os.name").toLowerCase().contains("mac"))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment