Commit 549f3417 authored by Christos Christodoulopoulos's avatar Christos Christodoulopoulos
Browse files

Added IAA calculator

Works only in gold mode
parent 5a32133f
No preview for this file type
package jubilee.agreement;
import jubilee.datastructure.JBCorpus;
import jubilee.datastructure.JBDataStructure;
import jubilee.treebank.TBTree;
import java.util.*;
/**
* Count:
* x annotated Y/N (stop here if N)
* x predicate sense
* x # of args
* x spans of arguments
* x labels of arguments
* - child vs adult
* - prep vs verb (sense vs label vs span)
* x spans (yields) not trees
*/
public class AgreementCalculator {
private static final String NULL = "null";
public static void corpusAgreement(JBCorpus corpus1, JBCorpus corpus2) {
List<String> annotations1 = new ArrayList<String>();
List<String> annotations2 = new ArrayList<String>();
// Collect all the annotations that align wrt the predicate
for (int annId = 0; annId < corpus1.getSize(); annId++) {
JBDataStructure annotation1 = corpus1.getAnnotation(annId);
JBDataStructure annotation2 = findAlignedAnnotation(corpus2, annotation1);
if (annotation2 == null) continue;
// Get the predicate sense
String sense1 = annotation1.getRoleset().substring(annotation1.getRoleset().indexOf('.') + 1);
String sense2 = annotation2.getRoleset().substring(annotation2.getRoleset().indexOf('.') + 1);
// Count the number of arguments annotated
int argNo1 = annotation1.getPropbankTree().split("\\s+").length;
int argNo2 = annotation2.getPropbankTree().split("\\s+").length;
// Get the verb and prep annotations separately
String prefix = (annotation1.getType().contains("-p")) ? "p-" : "v-";
// Get the child and adult utterances separately
String contextId = corpus1.getContext(annotation1.getIndexInContext());
prefix += (contextId.substring(0, contextId.indexOf(":")).contains("*CHI")) ? "c" : "a";
annotations1.add(prefix + "-sense:" + sense1);
annotations2.add(prefix + "-sense:" + sense2);
annotations1.add(prefix + "-#args:" + argNo1);
annotations2.add(prefix + "-#args:" + argNo2);
if (argNo1 >= argNo2)
addAnnotations(annotations1, annotations2, annotation1, annotation2, prefix);
else
addAnnotations(annotations2, annotations1, annotation2, annotation1, prefix);
}
Map<String, AnnotationStructure> annotations = new HashMap<String, AnnotationStructure>();
for (String type : Arrays.asList("p", "v")) {
for (String speaker : Arrays.asList("c", "a")) {
annotations.put("Overall (" + type + "/" + speaker + ")", new AnnotationStructure());
annotations.put("Predicate Sense (" + type + "/" + speaker + ")", new AnnotationStructure());
annotations.put("Arguments Overall (" + type + "/" + speaker + ")", new AnnotationStructure());
annotations.put("Arguments Yield (" + type + "/" + speaker + ")", new AnnotationStructure());
annotations.put("Arguments Labels (" + type + "/" + speaker + ")", new AnnotationStructure());
annotations.put("Arguments Number (" + type + "/" + speaker + ")", new AnnotationStructure());
for (int i = 0; i < annotations1.size(); i++) {
String ann1 = annotations1.get(i);
String ann2 = annotations2.get(i);
annotations.get("Overall (" + type + "/" + speaker + ")").add(ann1, ann2);
if (ann1.startsWith(type + "-" + speaker + "-sense")) {
annotations.get("Predicate Sense (" + type + "/" + speaker + ")").add(ann1, ann2);
}
else if (ann1.startsWith(type + "-" + speaker + "-#args")) {
annotations.get("Arguments Number (" + type + "/" + speaker + ")").add(ann1, ann2);
}
else if (ann1.startsWith(type + "-" + speaker + "-label")) {
annotations.get("Arguments Labels (" + type + "/" + speaker + ")").add(ann1, ann2);
annotations.get("Arguments Overall (" + type + "/" + speaker + ")").add(ann1, ann2);
}
else if (ann1.startsWith(type + "-" + speaker + "-yield")) {
annotations.get("Arguments Yield (" + type + "/" + speaker + ")").add(ann1, ann2);
annotations.get("Arguments Overall (" + type + "/" + speaker + ")").add(ann1, ann2);
}
}
}
}
annotations.put("Overall", new AnnotationStructure());
annotations.put("Predicate Sense", new AnnotationStructure());
annotations.put("Arguments Overall", new AnnotationStructure());
annotations.put("Arguments Yield", new AnnotationStructure());
annotations.put("Arguments Labels", new AnnotationStructure());
annotations.put("Arguments Number", new AnnotationStructure());
for (int i = 0; i < annotations1.size(); i++) {
String ann1 = annotations1.get(i);
String ann2 = annotations2.get(i);
annotations.get("Overall").add(ann1, ann2);
if (ann1.contains("sense")) {
annotations.get("Predicate Sense").add(ann1, ann2);
}
else if (ann1.contains("#args")) {
annotations.get("Arguments Number").add(ann1, ann2);
}
else if (ann1.contains("label")) {
annotations.get("Arguments Labels").add(ann1, ann2);
annotations.get("Arguments Overall").add(ann1, ann2);
}
else if (ann1.contains("yield")) {
annotations.get("Arguments Yield").add(ann1, ann2);
annotations.get("Arguments Overall").add(ann1, ann2);
}
}
System.out.println("Total annotations in corpus 1:\t" + corpus1.getSize());
System.out.println("Total annotations in corpus 2:\t" + corpus2.getSize());
System.out.println("Annotations missing from 2:\t" + missingAnnotations(corpus1, corpus2));
System.out.println("Annotations missing from 1:\t" + missingAnnotations(corpus2, corpus1));
for (String type : annotations.keySet()) {
System.out.println(type);
calculateKappa(annotations.get(type));
}
}
private static void calculateKappa(AnnotationStructure annotations) {
List<Integer> annotations1Idx = annotations.getAnnotations1();
List<Integer> annotations2Idx = annotations.getAnnotations2();
int numLabels = annotations.lexiconSize();
AnnotatorAgreement kappa = new PairwiseCohensKappa(annotations1Idx.size(), numLabels);
for (int i = 0; i < annotations1Idx.size(); i++) {
kappa.addAnnotation(0, i, annotations1Idx.get(i));
kappa.addAnnotation(1, i, annotations2Idx.get(i));
}
System.out.println("\tExpected agreement:\t\t" + kappa.getExpectedAgreement());
System.out.println("\tObserved agreement:\t\t" + kappa.getObservedAgreement());
System.out.println("\tCohen's kappa:\t\t\t" + kappa.getAgreementCoefficient());
}
private static void addAnnotations(List<String> annotations1, List<String> annotations2,
JBDataStructure annotation1, JBDataStructure annotation2, String prefix) {
String[] args1 = annotation1.getPropbankTree().split("\\s+");
String[] args2 = annotation2.getPropbankTree().split("\\s+");
for (String ann1 : args1) {
// Check for and split any special functions
for (String a1 : ann1.split("&|,|\\*")) {
addAnnotation(annotation1, a1, annotations1, prefix);
String matchingArg = getMatchingArg(args2, Integer.parseInt(a1.split(":")[0]));
addAnnotation(annotation2, matchingArg, annotations2, prefix);
}
}
}
private static void addAnnotation(JBDataStructure annotation, String annString, List<String> annotations,
String prefix) {
if (annString == null) {
annotations.add(prefix + "-yield:" + NULL);
annotations.add(prefix + "-label:" + NULL);
}
else {
int index = Integer.parseInt(annString.split(":")[0]);
int height = Integer.parseInt(annString.split(":|-")[1]);
TBTree tbTree1 = annotation.getTbTree();
tbTree1.moveTo(index, height);
String yield = tbTree1.getWords();
String label = tbTree1.getArg();
annotations.add(prefix + "-yield:" + yield);
annotations.add(prefix + "-label:" + label);
}
}
private static String getMatchingArg(String[] args2, int index) {
for (String ann2 : args2) {
// Check for and split any special functions
for (String a2 : ann2.split("&|,|\\*")) {
int index2 = Integer.parseInt(a2.split(":")[0]);
if (index2 == index) return a2;
}
}
return null;
}
private static int missingAnnotations(JBCorpus corpus1, JBCorpus corpus2) {
int totalMissing = 0;
for (int annId = 0; annId < corpus1.getSize(); annId++)
if (findAlignedAnnotation(corpus2, corpus1.getAnnotation(annId)) == null) totalMissing++;
return totalMissing;
}
private static JBDataStructure findAlignedAnnotation(JBCorpus otherCorpus, JBDataStructure annotation) {
List<JBDataStructure> otherAnnotations = otherCorpus.getAnnotationsFromContext(annotation.getIndexInContext());
if (otherAnnotations == null) return null;
for (JBDataStructure otherAnnotation : otherAnnotations) {
int indexDiff = Math.abs(otherAnnotation.getPredicateIndex() - annotation.getPredicateIndex());
// DON'T allow any difference in indices due to the insertion of empty nodes
// (and other noise from the CHAT version of the data)
if (indexDiff == 0 && otherAnnotation.getType().equals(annotation.getType()))
return otherAnnotation;
}
return null;
}
}
package jubilee.agreement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* A simple indexer to store the values of the individual labels used by the annotators
*/
public class AnnotationStructure {
private Map<String, Integer> labelToIndex = new HashMap<String, Integer>();
private List<Integer> annotations1 = new ArrayList<Integer>();
private List<Integer> annotations2 = new ArrayList<Integer>();
public int encode(String label) {
if (!labelToIndex.containsKey(label))
labelToIndex.put(label, labelToIndex.size());
return labelToIndex.get(label);
}
public int lexiconSize() {
return labelToIndex.size();
}
public void add(String annotation1, String annotation2) {
annotation1 = annotation1.substring(annotation1.lastIndexOf('-') + 1);
annotation2 = annotation2.substring(annotation2.lastIndexOf('-') + 1);
annotations1.add(encode(annotation1));
annotations2.add(encode(annotation2));
}
public List<Integer> getAnnotations1() {
return annotations1;
}
public List<Integer> getAnnotations2() {
return annotations2;
}
}
package jubilee.agreement;
/**
* This abstract class and its descendants are based on the survey article
* "Inter-Coder Agreement for Computational Linguistics" by Ron Artstein and Massimo Poessio in
* Computational Lingusitics, Volume 34, Number 4.
*
* @author Vivek Srikumar
*/
public abstract class AnnotatorAgreement {
protected final int numAnnotators;
protected final int numItems;
protected final int numLabels;
protected int[][] nik, nck;
protected int[] nk;
protected int[][] annotation;
public AnnotatorAgreement(int numAnnotators, int numItems, int numLabels) {
this.numAnnotators = numAnnotators;
this.numItems = numItems;
this.numLabels = numLabels;
this.nik = new int[numItems][numLabels];
this.nck = new int[numAnnotators][numLabels];
this.nk = new int[numLabels];
this.annotation = new int[this.numItems][this.numAnnotators];
}
public void addAnnotation(int annotatorId, int item, int label) {
nik[item][label]++;
nck[annotatorId][label]++;
nk[label]++;
annotation[item][annotatorId] = label;
}
public abstract double getObservedAgreement();
public abstract double getExpectedAgreement();
public double getObservedDisagreement() {
return 1 - getObservedAgreement();
}
public double getAgreementCoefficient() {
return (this.getObservedAgreement() - this.getExpectedAgreement())
/ (1 - this.getExpectedAgreement());
}
}
package jubilee.agreement;
/**
* @author Vivek Srikumar
*/
public abstract class PairwiseAgreement extends AnnotatorAgreement {
public PairwiseAgreement(int numItems, int numLabels) {
this(2, numItems, numLabels);
}
public PairwiseAgreement(int numAnnotators, int numItems, int numLabels) {
super(numAnnotators, numItems, numLabels);
if (numAnnotators != 2) {
String className = this.getClass().toString();
throw new IllegalArgumentException("Number of annotators is not two for an object of class " + className);
}
}
@Override
public double getObservedAgreement() {
double Ao = 0;
for (int i = 0; i < this.numItems; i++) {
if (annotation[i][0] == annotation[i][1])
Ao++;
}
Ao /= this.numItems;
return Ao;
}
public void printAgreementMatrix() {
int[][] matrix = new int[this.numLabels][this.numLabels];
for (int i = 0; i < this.numItems; i++) {
matrix[this.annotation[i][0]][this.annotation[i][1]]++;
}
for (int i = 0; i < numLabels; i++) {
for (int j = 0; j < numLabels; j++) {
System.out.print(matrix[i][j] + "\t");
}
System.out.println();
}
}
}
\ No newline at end of file
package jubilee.agreement;
/**
* This coefficient is computed by assuming that each label is equally likely. It was first
* described in (Bennett, Alpert and Goldstein, 1954).
* <p/>
* <b>Note:</b> The coefficient S is problematic in many respects. The value of the coefficient can
* be artificially increased by adding spurious categories. For more discussion, refer to (Artstein and Poesio, 2008).
*
* @author Vivek Srikumar
*/
public class PairwiseBennettS extends PairwiseAgreement {
public PairwiseBennettS(int numItems, int numLabels) {
super(numItems, numLabels);
}
@Override
public double getExpectedAgreement() {
return 1.0 / numLabels;
}
}
package jubilee.agreement;
/**
* @author Vivek Srikumar
*/
public class PairwiseCohensKappa extends PairwiseAgreement {
public PairwiseCohensKappa(int numItems, int numLabels) {
super(numItems, numLabels);
}
@Override
public double getExpectedAgreement() {
double Ae = 0;
for (int i = 0; i < numLabels; i++) {
Ae += (this.nck[0][i] * this.nck[1][i]);
}
Ae /= (this.numItems * this.numItems);
return Ae;
}
}
package jubilee.agreement;
/**
* This assumes that all annotators have the expected distribution for random assignment of labels.
*
* @author Vivek Srikumar
*/
public class PairwiseScottsPi extends PairwiseAgreement {
public PairwiseScottsPi(int numItems, int numLabels) {
super(numItems, numLabels);
}
@Override
public double getExpectedAgreement() {
double Ae = 0;
for (int i = 0; i < this.numLabels; i++) {
Ae += (this.nk[i] * this.nk[i]);
}
Ae /= (4 * this.numItems * this.numItems);
return Ae;
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment