package example;

import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import edu.illinois.cs.cogcomp.thrift.base.Labeling;
import edu.illinois.cs.cogcomp.thrift.base.Span;
import edu.illinois.cs.cogcomp.thrift.curator.Record;
import org.apache.commons.io.IOUtils;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;

import java.util.List;

/**
 * Created by haowu on 11/7/14.
 * An adhoc solution for reading chunk record of gigaword.
 */
public class GigawordChunkReader {

    /**
     * Deserialize Record, copied to here so this class is self contained.
     *
     * @param bytes
     * @return
     * @throws TException
     */
    public static Record deserializeRecord(byte[] bytes) throws TException {
        Record rec = new Record();
        TDeserializer td = new TDeserializer();
        td.deserialize(rec, bytes);
        return rec;
    }

    private static GigawordChunkReader gigawordChunkReader = new GigawordChunkReader();

    public static GigawordChunkReader getGigaWordChunkReader() {
        return gigawordChunkReader;
    }


    private AmazonS3 s3;
    private String bucketName;

    private GigawordChunkReader() {
        this.s3 = new AmazonS3Client(new BasicAWSCredentials("AKIAJSBHOGAQ5M4DHH2Q", "jnttSRxpJVmWxmwBPsWcOEz+ircsf16ERlboinep"));
        this.bucketName = "curatpr-processing-gigaword-chunk";
    }


    /**
     * Return the chunk view, if we can find it.
     * Always pass the file name, with no folder info.
     * @param fName
     * @return
     */
    public Record getChunkView(String fName) {

        if (fName.endsWith("_ner")) {
            fName = fName.replace("_ner", "_dependencies");
        } else {
            if (!fName.endsWith("_dependencies")) {
                return null;
            }
        }

        // I accidentally put them into two different folder (forget to change the corpora name when uploading them, it is to late to change when I found this issue)

        String[] possibleLocations = {
                "result_coll/wiki-coref/" + fName,
                "result_coll/gigaword-chunk/" + fName
        };

        for (String k : possibleLocations) {

            try {
                S3Object s3object = this.s3.getObject(new GetObjectRequest(
                        bucketName, k));

                Record r = deserializeRecord(IOUtils.toByteArray(s3object.getObjectContent()));

                return r;
            } catch( AmazonClientException ace){



            }catch (Exception e) {
                e.printStackTrace();
            }
        }

        return null;

    }


    public static void main(String[] args) {

        Record r = GigawordChunkReader.getGigaWordChunkReader().getChunkView("afe200206_16003_ner");
        printChunks(r);



    }

    public static void printChunks(Record r){
        List<Span> spans = r.getLabelViews().get("chunk").getLabels();
        for(Span s: spans){
            System.out.print( r.getRawText().substring(s.start,s.ending) );
            System.out.print(  "\t\t"  );
            System.out.println(s.getLabel());
        }

    }


}