switch to MapDB from H2, workaround to fix bugs caused by changes in core...

switch to MapDB from H2, workaround to fix bugs caused by changes in core utilities and other bug fixes

switch to MapDB from H2, workaround to fix bugs caused by changes in core...
58161599 · zpahuja2 · a0085071 · 58161599 · 58161599 · 58161599
Commit 58161599 authored 7 years ago by zpahuja2
--- a/config/srl-config.properties
+++ b/config/srl-config.properties
@@ -30,7 +30,7 @@ NombankHome = /shared/corpora/corporaWeb/treebanks/eng/nombank/

 # The directory of the sentence and pre-extracted features database (~5G of space required)
 # Not used during test/working with pre-trained models
-CacheDirectory = cache
+CacheDirectory = ./cache

 ModelsDirectory = models


--- a/pom.xml
+++ b/pom.xml
@@ -75,7 +75,7 @@
        <dependency>
            <groupId>com.gurobi</groupId>
            <artifactId>gurobi</artifactId>
-            <version>6.5</version>
+            <version>7.0.1</version>
            <optional>true</optional>
        </dependency>

@@ -104,6 +104,12 @@
            <version>1.4.190</version>
        </dependency>

+        <dependency>
+            <groupId>org.mapdb</groupId>
+            <artifactId>mapdb</artifactId>
+            <version>3.0.4</version>
+        </dependency>
+
        <dependency>
            <groupId>edu.illinois.cs.cogcomp</groupId>
            <artifactId>inference</artifactId>

--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -12,4 +12,4 @@ OPTIONS="$MEMORY -cp $CP "
 MAINCLASS=edu.illinois.cs.cogcomp.srl.Main
 #MAINCLASS=edu.illinois.cs.cogcomp.srl.SemanticRoleLabeler

-time nice java $OPTIONS $MAINCLASS "$@"
+time java $OPTIONS $MAINCLASS "$@"
--- a/src/main/java/edu/illinois/cs/cogcomp/srl/Main.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/Main.java
@@ -6,6 +6,7 @@ import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation.IResetableIterator;
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView;
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent
 import edu.illinois.cs.cogcomp.core.experiments.ClassificationTester;
 import edu.illinois.cs.cogcomp.core.io.IOUtils;
 import edu.illinois.cs.cogcomp.core.stats.Counter;
@@ -14,6 +15,7 @@ import edu.illinois.cs.cogcomp.core.utilities.commands.CommandIgnore;
 import edu.illinois.cs.cogcomp.core.utilities.commands.InteractiveShell;
 import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
 import edu.illinois.cs.cogcomp.infer.ilp.ILPSolverFactory;
+import edu.illinois.cs.cogcomp.ner.LbjTagger.Data;
 import edu.illinois.cs.cogcomp.nlp.corpusreaders.NombankReader;
 import edu.illinois.cs.cogcomp.nlp.corpusreaders.PropbankReader;
 import edu.illinois.cs.cogcomp.sl.core.SLParameters;
@@ -23,7 +25,7 @@ import edu.illinois.cs.cogcomp.sl.learner.LearnerFactory;
 import edu.illinois.cs.cogcomp.sl.util.IFeatureVector;
 import edu.illinois.cs.cogcomp.sl.util.WeightVector;
 import edu.illinois.cs.cogcomp.srl.caches.FeatureVectorCacheFile;
-import edu.illinois.cs.cogcomp.srl.caches.SentenceDBHandler;
+import edu.illinois.cs.cogcomp.srl.caches.SentenceMapDBHandler;
 import edu.illinois.cs.cogcomp.srl.core.ModelInfo;
 import edu.illinois.cs.cogcomp.srl.core.Models;
 import edu.illinois.cs.cogcomp.srl.core.SRLManager;
@@ -44,6 +46,7 @@ import edu.illinois.cs.cogcomp.srl.verb.VerbSRLManager;
 import edu.illinois.cs.cogcomp.core.experiments.evaluators.PredicateArgumentEvaluator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.w3c.dom.Text;

 import java.io.File;
 import java.io.IOException;
@@ -57,7 +60,10 @@ public class Main {
 	private static SRLProperties properties;
 	private static String configFile;

-
+	/**
+	 *
+	 * @param arguments
+	 */
 	@CommandIgnore
 	public static void main(String[] arguments) {

@@ -96,6 +102,12 @@ public class Main {
 		}
 	}

+	/**
+	 *
+	 * @param srlType
+	 * @param cacheDatasets
+	 * @throws Exception
+	 */
 	@CommandDescription(description = "Performs the full training & testing sequence for all SRL types",
 			usage = "expt [Verb | Nom] cacheDatasets=[true | false]")
 	public static void expt(String srlType, String cacheDatasets) throws Exception {
@@ -124,24 +136,37 @@ public class Main {
 		evaluate(srlType);
 	}

+	/**
+	 *
+	 * @throws Exception
+	 */
 	@CommandDescription(description = "Reads and caches all the datasets", usage = "cacheDatasets")
 	public static void cacheDatasets() throws Exception {
 		log.info("Initializing datasets");
-		SentenceDBHandler.instance.initializeDatasets(properties.getSentenceDBFile());
+		SentenceMapDBHandler.instance.initializeDatasets(properties.getSentenceDBFile());

 		// Add Propbank data
 		log.info("Caching PropBank data");
 		cacheVerbNom(SRLType.Verb);

+		// NomBank not necessary for Verb SRL
+
+		/*
 		log.info("Caching NomBank data");
 		cacheVerbNom(SRLType.Nom);

 		log.info("Cached all datasets");
+		*/

 		log.info("Adding required views in PTB");
-		addRequiredViews(SentenceDBHandler.instance.getDataset(Dataset.PTBAll));
+		addRequiredViews(SentenceMapDBHandler.instance.getDataset(Dataset.PTBAll));
 	}

+	/**
+	 *
+	 * @param srlType Verb or Nom
+	 * @throws Exception
+	 */
 	private static void cacheVerbNom(SRLType srlType) throws Exception {
 		String treebankHome = properties.getPennTreebankHome();
 		String[] allSectionsArray = properties.getAllSections();
@@ -151,6 +176,13 @@ public class Main {
 		List<String> devSections = Arrays.asList(properties.getDevSections());
 		List<String> ptb0204Sections = Arrays.asList("02", "03", "04");

+		log.info("Sections {allSectionsArray= " + Arrays.toString(allSectionsArray)
+					+ ", trainSections= " + trainSections.toString()
+					+ ", testSections= " + testSections.toString()
+					+ ", trainDevSections= " + trainDevSections.toString()
+					+ ", devSections= " + devSections.toString()
+					+ ", ptb0204Sections" + ptb0204Sections.toString() + "}");
+
 		String dataHome;
 		if (srlType == SRLType.Verb)
 			dataHome = properties.getPropbankHome();
@@ -164,30 +196,54 @@ public class Main {
 		else
 			data = new NombankReader(treebankHome, dataHome, allSectionsArray, goldView, true);

+		/* map of dataset to list of text annotation from all sections
+		   so we only have to commit to db once which is costly op */
+		Map <Dataset, ArrayList<TextAnnotation> > datasetToListOfAnnotations = new HashMap();
+		for (Dataset dataset : Dataset.values())
+			datasetToListOfAnnotations.put(dataset, new ArrayList<TextAnnotation>());
+
 		int count = 0;
+
 		while (data.hasNext()) {
 			TextAnnotation ta = data.next();
 			if (ta.hasView(goldView)) {
 				String id = ta.getId();
 				String section = id.substring(id.indexOf('/')+1, id.lastIndexOf('/'));
-				SentenceDBHandler.instance.addTextAnnotation(Dataset.PTBAll, ta);
+
+				datasetToListOfAnnotations.get(Dataset.PTBAll).add(ta);
+
+				boolean taInAtleastOneSection = false;
 				if (trainSections.contains(section))
-					SentenceDBHandler.instance.addTextAnnotation(Dataset.PTBTrain, ta);
+					taInAtleastOneSection = datasetToListOfAnnotations.get(Dataset.PTBTrain).add(ta);
 				if (devSections.contains(section))
-					SentenceDBHandler.instance.addTextAnnotation(Dataset.PTBDev, ta);
+					taInAtleastOneSection = datasetToListOfAnnotations.get(Dataset.PTBDev).add(ta);
 				if (trainDevSections.contains(section))
-					SentenceDBHandler.instance.addTextAnnotation(Dataset.PTBTrainDev, ta);
+					taInAtleastOneSection = datasetToListOfAnnotations.get(Dataset.PTBTrainDev).add(ta);
 				if (testSections.contains(section))
-					SentenceDBHandler.instance.addTextAnnotation(Dataset.PTBTest, ta);
+					taInAtleastOneSection = datasetToListOfAnnotations.get(Dataset.PTBTest).add(ta);
 				if (ptb0204Sections.contains(section))
-					SentenceDBHandler.instance.addTextAnnotation(Dataset.PTB0204, ta);
+					taInAtleastOneSection = datasetToListOfAnnotations.get(Dataset.PTB0204).add(ta);
+				if (!taInAtleastOneSection)
+					log.info("Text Annotation with id: {} and section: {} not in any specified dataset", id, section);
 			}

 			count++;
 			if (count % 10000 == 0) System.out.println(count + " sentences done");
 		}
+
+		for (Map.Entry<Dataset, ArrayList<TextAnnotation>> entry : datasetToListOfAnnotations.entrySet()) {
+			log.info("Added {} annotations to dataset {}", entry.getValue().size(), entry.getKey());
+			SentenceMapDBHandler.instance.addListOfTextAnnotation(entry.getKey(), entry.getValue());
+		}
+
+		System.out.println(count + " sentences done and test data count is " + testCount);
 	}

+	/**
+	 *
+	 * @param dataset
+	 * @throws IOException
+	 */
 	private static void addRequiredViews(IResetableIterator<TextAnnotation> dataset) throws IOException {
 		Counter<String> addedViews = new Counter<>();

@@ -204,17 +260,17 @@ public class Main {
 			} catch (Exception e) {
 				// Remove from dataset
 				log.error("Annotation failed, removing sentence from dataset");
-				SentenceDBHandler.instance.removeTextAnnotation(ta);
+				SentenceMapDBHandler.instance.removeTextAnnotation(ta);
 				continue;
 			}
-			String parserView = ViewNames.DEPENDENCY + ":";
+			String parserView = ViewNames.DEPENDENCY_HEADFINDER + ":";
 			String parser = properties.getDefaultParser();
 			if (parser.equals("Charniak")) parserView += ViewNames.PARSE_CHARNIAK;
 			if (parser.equals("Berkeley")) parserView += ViewNames.PARSE_BERKELEY;
 			if (parser.equals("Stanford")) parserView += ViewNames.PARSE_STANFORD;
 			if (ta.getView(parserView).getNumberOfConstituents() != ta.getSentence(0).size()) {
 				log.error("Head-dependency mismatch, removing sentence from dataset");
-				SentenceDBHandler.instance.removeTextAnnotation(ta);
+				SentenceMapDBHandler.instance.removeTextAnnotation(ta);
 				continue;
 			}

@@ -222,7 +278,7 @@ public class Main {
 			newViews.removeAll(views);

 			if (newViews.size() > 0) {
-				SentenceDBHandler.instance.updateTextAnnotation(ta);
+				SentenceMapDBHandler.instance.updateTextAnnotation(ta);
 				for (String s : newViews) addedViews.incrementCount(s);
 			}
 			count++;
@@ -232,6 +288,13 @@ public class Main {
 		for (String s : addedViews.items()) log.info(s + "\t" + addedViews.getCount(s));
 	}

+	/**
+	 *
+	 * @param srlType
+	 * @param trainingMode
+	 * @return
+	 * @throws Exception
+	 */
 	@CommandIgnore
 	public static SRLManager getManager(SRLType srlType, boolean trainingMode) throws Exception {
 		String viewName;
@@ -258,6 +321,12 @@ public class Main {
 		else return null;
 	}

+	/**
+	 *
+	 * @param srlType_
+	 * @param model
+	 * @throws Exception
+	 */
 	@CommandDescription(description = "Pre-extracts the features for a specific model and SRL type. " +
 			"Run this before training",
 			usage = "preExtract [Verb | Nom] [Predicate | Sense | Identifier | Classifier]")
@@ -312,10 +381,20 @@ public class Main {
 			Dataset datasetIdentifier = Dataset.PTBDev;
 			String devCacheFile = properties.getFeatureCacheFile(srlType,
 					modelToExtract, featureSet, defaultParser, datasetIdentifier);
+			log.info("Pre-extracting PTBDev for tuneIdentifier()");
 			preExtract(numConsumers, manager, modelToExtract, datasetIdentifier, devCacheFile, false);
 		}
 	}

+	/**
+	 *
+	 * @param numConsumers
+	 * @param manager
+	 * @param modelToExtract
+	 * @param featureCache
+	 * @param cacheFile2
+	 * @throws Exception
+	 */
 	private static void pruneFeatures(
 			int numConsumers, SRLManager manager,
 			Models modelToExtract, FeatureVectorCacheFile featureCache,
@@ -336,6 +415,17 @@ public class Main {
 		p1.finalize();
 	}

+	/**
+	 *
+	 * @param numConsumers
+	 * @param manager
+	 * @param modelToExtract
+	 * @param dataset
+	 * @param cacheFile
+	 * @param lockLexicon
+	 * @return
+	 * @throws Exception
+	 */
 	private static FeatureVectorCacheFile preExtract(
 			int numConsumers, SRLManager manager, Models modelToExtract, Dataset dataset,
 			String cacheFile, boolean lockLexicon) throws Exception {
@@ -357,7 +447,7 @@ public class Main {
 		}

 		FeatureVectorCacheFile featureCache = new FeatureVectorCacheFile(cacheFile, modelToExtract, manager);
-		Iterator<TextAnnotation> data = SentenceDBHandler.instance.getDataset(dataset);
+		Iterator<TextAnnotation> data = SentenceMapDBHandler.instance.getDataset(dataset);
 		PreExtractor p = new PreExtractor(manager, data, numConsumers, modelToExtract, featureCache);

 		if (lockLexicon)
@@ -369,6 +459,12 @@ public class Main {
 		return featureCache;
 	}

+	/**
+	 *
+	 * @param srlType_
+	 * @param model_
+	 * @throws Exception
+	 */
 	@CommandDescription(description = "Trains a specific model and SRL type",
 			usage = "train [Verb | Nom] [Predicate | Sense | Identifier | Classifier]")
 	public static void train(String srlType_, String model_) throws Exception {
@@ -388,7 +484,7 @@ public class Main {

 		String featureSet = "" + modelInfo.featureManifest.getIncludedFeatures().hashCode();
 		String cacheFile = properties.getPrunedFeatureCacheFile(srlType, model, featureSet, defaultParser);
-		System.out.println("In train feat cahce is "+cacheFile);
+		System.out.println("In train feat cache is " + cacheFile);

        // NB: Tuning code for the C value has been deleted
 		double c = 0.01;
@@ -414,6 +510,11 @@ public class Main {
 		WeightVectorUtils.save(manager.getModelFileName(model), w);
 	}

+	/**
+	 *
+	 * @param srlType_
+	 * @throws Exception
+	 */
 	private static void tuneIdentifier(String srlType_) throws Exception {
 		SRLType srlType = SRLType.valueOf(srlType_);
 		SRLManager manager = getManager(srlType, true);
@@ -451,6 +552,11 @@ public class Main {
 		manager.writeIdentifierScale(pair.getFirst(), pair.getSecond());
 	}

+	/**
+	 *
+	 * @param srlType_
+	 * @throws Exception
+	 */
 	@CommandDescription(description = "Performs evaluation.", usage = "evaluate [Verb | Nom]")
 	public static void evaluate(String srlType_) throws Exception {
 		SRLType srlType = SRLType.valueOf(srlType_);
@@ -492,7 +598,8 @@ public class Main {
 		manager.getModelInfo(Models.Classifier).loadWeightVector();

 		manager.getModelInfo(Models.Sense).loadWeightVector();
-		IResetableIterator<TextAnnotation> dataset = SentenceDBHandler.instance.getDataset(testSet);
+		SentenceMapDBHandler.instance.logDatasetSizes();
+		IResetableIterator<TextAnnotation> dataset = SentenceMapDBHandler.instance.getDataset(testSet);
 		log.info("All models weights loaded now!");
        PredicateArgumentEvaluator evaluator = new PredicateArgumentEvaluator();

@@ -508,11 +615,32 @@ public class Main {
 			assert inference != null;
 			PredicateArgumentView prediction = inference.getOutputView();

-            evaluator.evaluate(tester, gold, prediction);
-            evaluator.evaluateSense(senseTester, gold, prediction);
+			/*
+			Copying attributes for all constituents
+			This is a side effect of changes in core utilities
+			Evaluate removes all attributes so copying it before evaluate() and then restoring back
+			This work around is only needed until v3.1.4. Fixed for later versions
+			*/
+			Map<String, String> goldAttributes = new HashMap<String, String>();
+			for (Constituent cons: gold.getConstituents()) {
+				for (String key: cons.getAttributeKeys())
+					goldAttributes.put(key, cons.getAttribute(key));
+			}
+
+			evaluator.evaluateSense(senseTester, gold, prediction);
+			evaluator.evaluate(tester, gold, prediction);
+
+		    /*
+			Restoring attributes for gold view after evaluate
+			 */
+			for (Constituent cons: gold.getConstituents()) {
+				for (String key: goldAttributes.keySet())
+					cons.addAttribute(key, goldAttributes.get(key));
+			}

 			if (outDir != null) {
 				writer.printPredicateArgumentView(gold, goldWriter);
+				writer.printPredicateArgumentView(prediction, predWriter);
 			}

 			count++;

--- a/src/main/java/edu/illinois/cs/cogcomp/srl/caches/SentenceCacheHandler.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/caches/SentenceCacheHandler.java
+package edu.illinois.cs.cogcomp.srl.caches;
+
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.IResetableIterator;
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
+import edu.illinois.cs.cogcomp.srl.SRLProperties;
+import edu.illinois.cs.cogcomp.srl.data.Dataset;
+
+/**
+ * Created by Zubin on 7/2/17.
+ */
+public interface SentenceCacheHandler {
+
+    void initializeDatasets(String dbFile);
+
+    void addTextAnnotation(Dataset dataset, TextAnnotation ta);
+
+    void updateTextAnnotation(TextAnnotation ta);
+
+    IResetableIterator<TextAnnotation> getDataset(Dataset dataset);
+
+    boolean contains(TextAnnotation ta);
+
+    void removeTextAnnotation(TextAnnotation ta);
+}
\ No newline at end of file
--- a/src/main/java/edu/illinois/cs/cogcomp/srl/caches/SentenceDBHandler.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/caches/SentenceDBHandler.java
@@ -14,15 +14,15 @@ import java.sql.ResultSet;
 import java.sql.SQLException;


-public class SentenceDBHandler {
+public class SentenceDBHandler implements SentenceCacheHandler {
+
+	public static final SentenceDBHandler instance = new SentenceDBHandler(
+			SRLProperties.getInstance().getSentenceDBFile());

 	private Logger log = org.slf4j.LoggerFactory.getLogger(SentenceDBHandler.class);

 	private final String dbFile;

-	public static final SentenceDBHandler instance = new SentenceDBHandler(
-			SRLProperties.getInstance().getSentenceDBFile());
-
 	private SentenceDBHandler(String dbFile) {

 		this.dbFile = dbFile;
@@ -65,6 +65,7 @@ public class SentenceDBHandler {
 		}
 	}

+	@Override
 	public void initializeDatasets(String dbFile) {
 		Connection connection = DBHelper.getConnection(dbFile);
 		for (Dataset d : Dataset.values()) {
@@ -91,6 +92,7 @@ public class SentenceDBHandler {
 		}
 	}

+	@Override
 	public void addTextAnnotation(Dataset dataset, TextAnnotation ta) {
 		try {

@@ -146,6 +148,7 @@ public class SentenceDBHandler {
 		}
 	}

+	@Override
 	public void updateTextAnnotation(TextAnnotation ta) {

 		try {
@@ -180,6 +183,7 @@ public class SentenceDBHandler {
 		}
 	}

+	@Override
 	public IResetableIterator<TextAnnotation> getDataset(final Dataset dataset) {

 		try {
@@ -243,6 +247,7 @@ public class SentenceDBHandler {

 	}

+	@Override
 	public boolean contains(TextAnnotation ta) {
 		int id = ta.getTokenizedText().hashCode();
 		Connection connection = DBHelper.getConnection(dbFile);
@@ -262,6 +267,7 @@ public class SentenceDBHandler {

 	}

+	@Override
 	public void removeTextAnnotation(TextAnnotation ta) {
 		try {
 			Connection connection = DBHelper.getConnection(dbFile);

--- a/src/main/java/edu/illinois/cs/cogcomp/srl/caches/SentenceMapDBHandler.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/caches/SentenceMapDBHandler.java
+/**
+ * Created by Zubin on 7/2/17.
+ */
+
+package edu.illinois.cs.cogcomp.srl.caches;
+
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.IResetableIterator;
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
+import edu.illinois.cs.cogcomp.core.io.IOUtils;
+import edu.illinois.cs.cogcomp.core.utilities.SerializationHelper;
+import edu.illinois.cs.cogcomp.srl.SRLProperties;
+import edu.illinois.cs.cogcomp.srl.data.Dataset;
+
+import org.mapdb.DB;
+import org.mapdb.DBException;
+import org.mapdb.DBMaker;
+import org.mapdb.Serializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import org.jetbrains.annotations.NotNull;
+import org.w3c.dom.Text;
+
+public class SentenceMapDBHandler implements SentenceCacheHandler {
+
+    public static final SentenceMapDBHandler instance = new SentenceMapDBHandler(
+            SRLProperties.getInstance().getSentenceDBFile());
+
+    private DB db;
+
+    private Logger log = org.slf4j.LoggerFactory.getLogger(SentenceMapDBHandler.class);
+
+    private final String dbFile;
+
+    private SentenceMapDBHandler(String dbFile) {
+
+        this.dbFile = dbFile;
+
+        log.info("Sentence cache {} {}found", dbFile,
+                IOUtils.exists(dbFile) ? "" : "not ");
+
+        try {
+            // enabling transactions avoids cache corruption if service fails.
+            // this.db = DBMaker.fileDB(dbFile).closeOnJvmShutdown().transactionEnable().make();
+            this.db = DBMaker.fileDB(dbFile).closeOnJvmShutdown().make();
+        }
+
+        catch (DBException e) {
+            e.printStackTrace();
+            System.err.println("mapdb couldn't instantiate db using file '" + dbFile +
+                    "': check error and either remove lock, repair file, or delete file.");
+            throw e;
+        }
+    }
+
+    /**
+     * MapDB requires the database to be closed at the end of operations. This is usually handled by the
+     * {@code closeOnJvmShutdown()} snippet in the initializer, but this method needs to be called if
+     * multiple instances of the {@link SentenceMapDBHandler} are used.
+     */
+    public void close() {
+        db.commit();
+        db.close();
+    }
+
+    /**
+     * Checks if the dataset is cached in the DB.
+     *
+     * @param dataset The name of the dataset (e.g. "train", "test")
+     * @param dbFile The name of the MapDB file
+     * @return Whether the dataset exists in the DB
+     */
+    public boolean isCached(Dataset dataset, String dbFile) {
+        return IOUtils.exists(dbFile) && getMap(dataset.name()).size() > 0;
+    }
+
+    @Override
+    public void initializeDatasets(String dbFile) {
+
+    }
+
+    @Override
+    public void addTextAnnotation(Dataset dataset, TextAnnotation ta) {
+        final ConcurrentMap<Integer, byte[]> data = getMap(dataset.name());
+        try {
+            data.put(ta.getTokenizedText().hashCode(), SerializationHelper.serializeTextAnnotationToBytes(ta));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        db.commit();
+    }
+
+    public void addListOfTextAnnotation(Dataset dataset, ArrayList<TextAnnotation> taList) {
+        final ConcurrentMap<Integer, byte[]> data = getMap(dataset.name());
+        try {
+            for (TextAnnotation ta: taList) {
+                data.put(ta.getTokenizedText().hashCode(), SerializationHelper.serializeTextAnnotationToBytes(ta));
+            }
+
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        db.commit();
+    }
+
+    @Override
+    public void updateTextAnnotation(TextAnnotation ta) {
+        for (String dataset : getAllDatasets()) {
+            final ConcurrentMap<Integer, byte[]> data = getMap(dataset);
+            try {
+                data.replace(ta.getTokenizedText().hashCode(), SerializationHelper.serializeTextAnnotationToBytes(ta));
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+        db.commit();
+    }
+
+    @Override
+    public IResetableIterator<TextAnnotation> getDataset(Dataset dataset) {
+        final Collection<byte[]> list = getMap(dataset.name()).values();
+        log.info("Size of dataset {} is {}", dataset.name(), list.size());
+        return new IResetableIterator<TextAnnotation>() {
+            Iterator<byte[]> iterator = list.iterator();
+
+            @Override
+            public void remove() {}
+
+            @Override
+            public void reset() {
+                iterator = list.iterator();
+            }
+
+            @Override
+            public boolean hasNext() {
+                return iterator.hasNext();
+            }
+
+            @Override
+            public TextAnnotation next() {
+                byte[] bytes = iterator.next();
+                return SerializationHelper.deserializeTextAnnotationFromBytes(bytes);
+            }
+        };
+    }
+
+    /**
+     * checks whether ta with corresponding TEXT is in database -- not whether the same
+     *    annotations are present
+     * @param ta
+     * @return
+     */
+    @Override
+    public boolean contains(TextAnnotation ta) {
+        boolean isContained = false;
+        for (String dataset : getAllDatasets()) {
+            final ConcurrentMap<Integer, byte[]> data = getMap(dataset);
+            isContained |= data.containsKey(ta.getTokenizedText().hashCode());
+        }
+        return isContained;
+    }
+
+    public void logDatasetSizes() {
+        String logStr = "Size of Datasets:\n";
+        for (String dataset : getAllDatasets()) {
+            logStr += "\t" + dataset + " :\t" + getMap(dataset).values().size();
+        }
+        log.info(logStr);
+    }
+
+
+    @Override
+    public void removeTextAnnotation(TextAnnotation ta) {
+        for (String dataset : getAllDatasets()) {
+            final ConcurrentMap<Integer, byte[]> data = getMap(dataset);
+            data.remove(ta.getTokenizedText().hashCode());
+        }
+    }
+
+    public TextAnnotation getTextAnnotation(TextAnnotation ta) {
+        for (String dataset : getAllDatasets()) {
+            final ConcurrentMap<Integer, byte[]> data = getMap(dataset);
+            if(data.containsKey(ta.getTokenizedText().hashCode()))
+            {
+                byte[] taData = data.get(ta.getTokenizedText().hashCode());
+                return SerializationHelper.deserializeTextAnnotationFromBytes(taData);
+            }
+        }
+        return null;
+    }
+
+    private ConcurrentMap<Integer, byte[]> getMap(String dataset) {
+        return db.hashMap(dataset, Serializer.INTEGER, Serializer.BYTE_ARRAY).createOrOpen();
+    }
+
+    @SuppressWarnings("ConstantConditions")
+    @NotNull
+    private Iterable<String> getAllDatasets() {
+        //ReentrantReadWriteLock.ReadLock lock = db.getLock$mapdb().readLock();
+        //lock.tryLock();
+        Iterable<String> allNames = db.getAllNames();
+        //lock.unlock();
+        return allNames;
+    }
+}
--- a/src/main/java/edu/illinois/cs/cogcomp/srl/experiment/TextPreProcessor.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/experiment/TextPreProcessor.java
@@ -96,7 +96,7 @@ public class TextPreProcessor {
 		}
 		if (!ta.hasView(ViewNames.CLAUSES_STANFORD))
 			ta.addView(ClauseViewGenerator.STANFORD);
-		if (!ta.hasView(ViewNames.DEPENDENCY + ":" + ViewNames.PARSE_STANFORD))
+		if (!ta.hasView(ViewNames.DEPENDENCY_HEADFINDER + ":" + ViewNames.PARSE_STANFORD))
 			ta.addView(new HeadFinderDependencyViewGenerator(ViewNames.PARSE_STANFORD));

 	}

--- a/src/main/java/edu/illinois/cs/cogcomp/srl/inference/ILPOutput.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/inference/ILPOutput.java
@@ -95,7 +95,9 @@ public class ILPOutput implements ILPOutputGenerator {
 			}

 			assert label != null;
-			argLabels[candidateId] = manager.getArgumentId(label);
+			if (label != null)
+				argLabels[candidateId] = manager.getArgumentId(label);
+			else log.info("Candidate Removed: label for {} was null", x.getCandidateInstances().get(candidateId));

 			log.debug("Prediction for {}: {}",
 					x.getCandidateInstances().get(candidateId), label);
@@ -126,8 +128,15 @@ public class ILPOutput implements ILPOutputGenerator {

 		assert sense != null;

-		int senseLabel = manager.getSenseId(sense);
-		return senseLabel;
+		if (sense != null) {
+			return manager.getSenseId(sense);
+		} else {
+			/*
+		    IMPORTANT: sense should not have been null. No such null class for sense so returning 01
+		    */
+		    log.debug("ILP could not assign any sense class. predicateId: {} lemma: {}", predicateId, lemma);
+			return manager.getSenseId("01");
+		}
 	}

 }