From fd3fd3738311e022afda305183fb8417d724b5cf Mon Sep 17 00:00:00 2001
From: Mosharaf Chowdhury <mosharaf@mosharaf-ubuntu.(none)>
Date: Mon, 10 Jan 2011 17:13:52 -0800
Subject: [PATCH] In-memory version of tracker+blocked shuffle checked in.

---
 conf/java-opts                                |   4 +-
 .../spark/CustomBlockedInMemoryShuffle.scala  |  27 +-
 .../TrackedCustomBlockedInMemoryShuffle.scala | 927 ++++++++++++++++++
 ...TrackedCustomBlockedLocalFileShuffle.scala |   1 +
 4 files changed, 933 insertions(+), 26 deletions(-)
 create mode 100644 src/scala/spark/TrackedCustomBlockedInMemoryShuffle.scala

diff --git a/conf/java-opts b/conf/java-opts
index bcce9f97e3..20eb4511f4 100644
--- a/conf/java-opts
+++ b/conf/java-opts
@@ -1,7 +1,7 @@
--Dspark.shuffle.class=spark.CustomParallelLocalFileShuffle
+-Dspark.shuffle.class=spark.TrackedCustomBlockedInMemoryShuffle
 -Dspark.shuffle.masterHostAddress=127.0.0.1
 -Dspark.shuffle.masterTrackerPort=22222 
--Dspark.shuffle.trackerStrategy=spark.BalanceRemainingShuffleTrackerStrategy 
+-Dspark.shuffle.trackerStrategy=spark.BalanceConnectionsShuffleTrackerStrategy 
 -Dspark.shuffle.maxRxConnections=40
 -Dspark.shuffle.maxTxConnections=120 
 -Dspark.shuffle.blockSize=4096
diff --git a/src/scala/spark/CustomBlockedInMemoryShuffle.scala b/src/scala/spark/CustomBlockedInMemoryShuffle.scala
index aae940dc8e..898ebe5af1 100644
--- a/src/scala/spark/CustomBlockedInMemoryShuffle.scala
+++ b/src/scala/spark/CustomBlockedInMemoryShuffle.scala
@@ -83,7 +83,7 @@ extends Shuffle[K, V, C] with Logging {
         var writeStartTime: Long = 0
         
         buckets(i).foreach(pair => {
-          // Open a new file if necessary
+          // Open a new stream if necessary
           if (!isDirty) {
             splitName = CustomBlockedInMemoryShuffle.getSplitName(shuffleId, 
               myIndex, i, blockNum)
@@ -98,7 +98,7 @@ extends Shuffle[K, V, C] with Logging {
           oos.writeObject(pair)
           isDirty = true
           
-          // Close the old file if has crossed the blockSize limit
+          // Close the old stream if has crossed the blockSize limit
           if (baos.size > Shuffle.BlockSize) {
             CustomBlockedInMemoryShuffle.splitsCache(splitName) = 
               baos.toByteArray
@@ -477,30 +477,9 @@ object CustomBlockedInMemoryShuffle extends Logging {
     nextShuffleId.getAndIncrement()
   }
   
-  // Returns a standard ThreadFactory except all threads are daemons
-  private def newDaemonThreadFactory: ThreadFactory = {
-    new ThreadFactory {
-      def newThread(r: Runnable): Thread = {
-        var t = Executors.defaultThreadFactory.newThread(r)
-        t.setDaemon(true)
-        return t
-      }
-    }
-  }
-
-  // Wrapper over newFixedThreadPool
-  def newDaemonFixedThreadPool(nThreads: Int): ThreadPoolExecutor = {
-    var threadPool =
-      Executors.newFixedThreadPool(nThreads).asInstanceOf[ThreadPoolExecutor]
-
-    threadPool.setThreadFactory(newDaemonThreadFactory)
-    
-    return threadPool
-  }
-  
   class ShuffleServer
   extends Thread with Logging {
-    var threadPool = newDaemonFixedThreadPool(Shuffle.MaxTxConnections)
+    var threadPool = Shuffle.newDaemonFixedThreadPool(Shuffle.MaxTxConnections)
 
     var serverSocket: ServerSocket = null
 
diff --git a/src/scala/spark/TrackedCustomBlockedInMemoryShuffle.scala b/src/scala/spark/TrackedCustomBlockedInMemoryShuffle.scala
new file mode 100644
index 0000000000..7c7281c9d7
--- /dev/null
+++ b/src/scala/spark/TrackedCustomBlockedInMemoryShuffle.scala
@@ -0,0 +1,927 @@
+package spark
+
+import java.io._
+import java.net._
+import java.util.{BitSet, Random, Timer, TimerTask, UUID}
+import java.util.concurrent.atomic.AtomicLong
+import java.util.concurrent.{LinkedBlockingQueue, Executors, ThreadPoolExecutor, ThreadFactory}
+
+import scala.collection.mutable.{ArrayBuffer, HashMap}
+
+/**
+ * An implementation of shuffle using memory served through custom server 
+ * where receivers create simultaneous connections to multiple servers by 
+ * setting the 'spark.shuffle.maxRxConnections' config option.
+ *
+ * By controlling the 'spark.shuffle.blockSize' config option one can also 
+ * control the largest block size to divide each map output into. Essentially, 
+ * instead of creating one large output file for each reducer, maps create
+ * multiple smaller files to enable finer level of engagement.
+ *
+ * 'spark.shuffle.maxTxConnections' enforces server-side cap. Ideally, 
+ * maxTxConnections >= maxRxConnections * numReducersPerMachine
+ *
+ * 'spark.shuffle.TrackerStrategy' decides which strategy to use in the tracker
+ *
+ * TODO: Add support for compression when spark.compress is set to true.
+ */
+@serializable
+class TrackedCustomBlockedInMemoryShuffle[K, V, C] 
+extends Shuffle[K, V, C] with Logging {
+  @transient var totalSplits = 0
+  @transient var hasSplits = 0
+  
+  @transient var totalBlocksInSplit: Array[Int] = null
+  @transient var hasBlocksInSplit: Array[Int] = null
+  
+  @transient var hasSplitsBitVector: BitSet = null
+  @transient var splitsInRequestBitVector: BitSet = null
+
+  @transient var receivedData: LinkedBlockingQueue[(Int, Array[Byte])] = null  
+  @transient var combiners: HashMap[K,C] = null
+  
+  override def compute(input: RDD[(K, V)],
+                       numOutputSplits: Int,
+                       createCombiner: V => C,
+                       mergeValue: (C, V) => C,
+                       mergeCombiners: (C, C) => C)
+  : RDD[(K, C)] =
+  {
+    val sc = input.sparkContext
+    val shuffleId = TrackedCustomBlockedInMemoryShuffle.newShuffleId()
+    logInfo("Shuffle ID: " + shuffleId)
+
+    val splitRdd = new NumberedSplitRDD(input)
+    val numInputSplits = splitRdd.splits.size
+
+    // Run a parallel map and collect to write the intermediate data files,
+    // returning a list of inputSplitId -> serverUri pairs
+    val outputLocs = splitRdd.map((pair: (Int, Iterator[(K, V)])) => {
+      val myIndex = pair._1
+      val myIterator = pair._2
+      val buckets = Array.tabulate(numOutputSplits)(_ => new HashMap[K, C])
+      for ((k, v) <- myIterator) {
+        var bucketId = k.hashCode % numOutputSplits
+        if (bucketId < 0) { // Fix bucket ID if hash code was negative
+          bucketId += numOutputSplits
+        }
+        val bucket = buckets(bucketId)
+        bucket(k) = bucket.get(k) match {
+          case Some(c) => mergeValue(c, v)
+          case None => createCombiner(v)
+        }
+      }
+      
+      // Keep track of number of blocks for each output split
+      var numBlocksPerOutputSplit = Array.tabulate(numOutputSplits)(_ => 0)
+      
+      for (i <- 0 until numOutputSplits) {
+        var blockNum = 0
+        var isDirty = false
+
+        var splitName = ""
+        var baos: ByteArrayOutputStream = null
+        var oos: ObjectOutputStream = null
+        
+        var writeStartTime: Long = 0
+        
+        buckets(i).foreach(pair => {
+          // Open a new stream if necessary
+          if (!isDirty) {
+            splitName = TrackedCustomBlockedInMemoryShuffle.getSplitName(shuffleId, 
+              myIndex, i, blockNum)
+              
+            baos = new ByteArrayOutputStream
+            oos = new ObjectOutputStream(baos)
+          
+            writeStartTime = System.currentTimeMillis
+            logInfo("BEGIN WRITE: " + splitName)
+          }
+          
+          oos.writeObject(pair)
+          isDirty = true
+          
+          // Close the old stream if has crossed the blockSize limit
+          if (baos.size > Shuffle.BlockSize) {
+            TrackedCustomBlockedInMemoryShuffle.splitsCache(splitName) = 
+              baos.toByteArray
+          
+            logInfo("END WRITE: " + splitName)
+            val writeTime = System.currentTimeMillis - writeStartTime
+            logInfo("Writing " + splitName + " of size " + baos.size + " bytes took " + writeTime + " millis.")
+
+            blockNum = blockNum + 1
+            isDirty = false            
+            oos.close()
+          }
+        })
+
+        if (isDirty) {
+          TrackedCustomBlockedInMemoryShuffle.splitsCache(splitName) = baos.toByteArray
+
+          logInfo("END WRITE: " + splitName)
+          val writeTime = System.currentTimeMillis - writeStartTime
+          logInfo("Writing " + splitName + " of size " + baos.size + " bytes took " + writeTime + " millis.")
+
+          blockNum = blockNum + 1
+          oos.close()
+        }
+        
+        // Store BLOCKNUM info
+        splitName = TrackedCustomBlockedInMemoryShuffle.getBlockNumOutputName(
+          shuffleId, myIndex, i)
+        baos = new ByteArrayOutputStream
+        oos = new ObjectOutputStream(baos)
+        oos.writeObject(blockNum)
+        TrackedCustomBlockedInMemoryShuffle.splitsCache(splitName) = baos.toByteArray
+
+        // Close streams
+        oos.close()
+
+        // Store number of blocks for this outputSplit
+        numBlocksPerOutputSplit(i) = blockNum
+      }
+      
+      var retVal = SplitInfo(TrackedCustomBlockedInMemoryShuffle.serverAddress, 
+        TrackedCustomBlockedInMemoryShuffle.serverPort, myIndex)
+      retVal.totalBlocksPerOutputSplit = numBlocksPerOutputSplit
+
+      (retVal)
+    }).collect()
+
+    // Start tracker
+    var shuffleTracker = new ShuffleTracker(outputLocs)
+    shuffleTracker.setDaemon(true)
+    shuffleTracker.start()
+    logInfo("ShuffleTracker started...")
+
+    // Return an RDD that does each of the merges for a given partition
+    val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits)
+    return indexes.flatMap((myId: Int) => {
+      totalSplits = outputLocs.size
+      hasSplits = 0
+      
+      totalBlocksInSplit = Array.tabulate(totalSplits)(_ => -1)
+      hasBlocksInSplit = Array.tabulate(totalSplits)(_ => 0)
+      
+      hasSplitsBitVector = new BitSet(totalSplits)
+      splitsInRequestBitVector = new BitSet(totalSplits)
+      
+      receivedData = new LinkedBlockingQueue[(Int, Array[Byte])]
+      combiners = new HashMap[K, C]
+      
+      var threadPool = Shuffle.newDaemonFixedThreadPool(
+        Shuffle.MaxRxConnections)
+        
+      while (hasSplits < totalSplits) {
+        // Local status of hasSplitsBitVector and splitsInRequestBitVector
+        val localSplitInfo = getLocalSplitInfo(myId)
+
+        // DO NOT talk to the tracker if all the required splits are already busy
+        val hasOrWillHaveSplits = localSplitInfo.hasSplitsBitVector.cardinality
+
+        var numThreadsToCreate =
+          Math.min(totalSplits - hasOrWillHaveSplits, Shuffle.MaxRxConnections) -
+          threadPool.getActiveCount
+      
+        while (hasSplits < totalSplits && numThreadsToCreate > 0) {
+          // Receive which split to pull from the tracker
+          logInfo("Talking to tracker...")
+          val splitIndex = getTrackerSelectedSplit(myId)
+          logInfo("Got %d from tracker...".format(splitIndex))
+          
+          if (splitIndex != -1) {
+            val selectedSplitInfo = outputLocs(splitIndex)
+            val requestSplit = 
+              "%d/%d/%d".format(shuffleId, selectedSplitInfo.splitId, myId)
+
+            threadPool.execute(new ShuffleClient(splitIndex, selectedSplitInfo, 
+              requestSplit, myId))
+              
+            // splitIndex is in transit. Will be unset in the ShuffleClient
+            splitsInRequestBitVector.synchronized {
+              splitsInRequestBitVector.set(splitIndex)
+            }
+          } else {
+            // Tracker replied back with a NO. Sleep for a while.
+            Thread.sleep(Shuffle.MinKnockInterval)
+          }
+          
+          numThreadsToCreate = numThreadsToCreate - 1
+        }
+        
+        // Sleep for a while before creating new threads
+        Thread.sleep(Shuffle.MinKnockInterval)
+      }
+
+      threadPool.shutdown()
+
+      // Start consumer
+      // TODO: Consumption is delayed until everything has been received. 
+      // Otherwise it interferes with network performance
+      var shuffleConsumer = new ShuffleConsumer(mergeCombiners)
+      shuffleConsumer.setDaemon(true)
+      shuffleConsumer.start()
+      logInfo("ShuffleConsumer started...")
+
+      // Don't return until consumption is finished
+      // TODO: Replace with a lock later. 
+      while (receivedData.size > 0) {
+        Thread.sleep(Shuffle.MinKnockInterval)
+      }
+      
+      combiners
+    })
+  }
+  
+  private def getLocalSplitInfo(myId: Int): SplitInfo = {
+    var localSplitInfo = SplitInfo(InetAddress.getLocalHost.getHostAddress, 
+      SplitInfo.UnusedParam, myId)
+    
+    // Store hasSplits
+    localSplitInfo.hasSplits = hasSplits
+    
+    // Store hasSplitsBitVector
+    hasSplitsBitVector.synchronized {
+      localSplitInfo.hasSplitsBitVector = 
+        hasSplitsBitVector.clone.asInstanceOf[BitSet]
+    }
+
+    // Store hasBlocksInSplit to hasBlocksPerInputSplit
+    hasBlocksInSplit.synchronized {
+      localSplitInfo.hasBlocksPerInputSplit = 
+        hasBlocksInSplit.clone.asInstanceOf[Array[Int]]
+    }
+
+    // Include the splitsInRequest as well
+    splitsInRequestBitVector.synchronized {
+      localSplitInfo.hasSplitsBitVector.or(splitsInRequestBitVector)
+    }
+    
+    return localSplitInfo
+  }  
+
+  def selectRandomSplit: Int = {
+    var requiredSplits = new ArrayBuffer[Int]
+    
+    synchronized {
+      for (i <- 0 until totalSplits) {
+        if (!hasSplitsBitVector.get(i) && !splitsInRequestBitVector.get(i)) {
+          requiredSplits += i
+        }
+      }
+    }
+    
+    if (requiredSplits.size > 0) {
+      requiredSplits(TrackedCustomBlockedInMemoryShuffle.ranGen.nextInt(
+        requiredSplits.size))
+    } else {
+      -1
+    }
+  }
+  
+  // Talks to the tracker and receives instruction
+  private def getTrackerSelectedSplit(myId: Int): Int = {
+    // Local status of hasSplitsBitVector and splitsInRequestBitVector
+    val localSplitInfo = getLocalSplitInfo(myId)
+
+    // DO NOT talk to the tracker if all the required splits are already busy
+    if (localSplitInfo.hasSplitsBitVector.cardinality == totalSplits) {
+      return -1
+    }
+
+    val clientSocketToTracker = new Socket(Shuffle.MasterHostAddress, 
+      Shuffle.MasterTrackerPort)
+    val oosTracker =
+      new ObjectOutputStream(clientSocketToTracker.getOutputStream)
+    oosTracker.flush()
+    val oisTracker =
+      new ObjectInputStream(clientSocketToTracker.getInputStream)
+
+    var selectedSplitIndex = -1
+
+    // Setup the timeout mechanism
+    var timeOutTask = new TimerTask {
+      override def run: Unit = {
+        logInfo("Waited enough for tracker response... Take random response...")
+  
+        // sockets will be closed  in finally
+        
+        // TODO: Selecting randomly here. Tracker won't know about it and get an
+        // asssertion failure when this thread leaves
+        
+        selectedSplitIndex = selectRandomSplit
+      }
+    }
+    
+    var timeOutTimer = new Timer
+    // TODO: Which timeout to use?
+    timeOutTimer.schedule(timeOutTask, Shuffle.MinKnockInterval)
+
+    try {
+      // Send intention
+      oosTracker.writeObject(
+        TrackedCustomBlockedInMemoryShuffle.ReducerEntering)
+      oosTracker.flush()
+      
+      // Send what this reducer has
+      oosTracker.writeObject(localSplitInfo)
+      oosTracker.flush()
+      
+      // Receive reply from the tracker
+      selectedSplitIndex = oisTracker.readObject.asInstanceOf[Int]
+      
+      // Turn the timer OFF
+      timeOutTimer.cancel()
+    } catch {
+      case e: Exception => {
+        logInfo("getTrackerSelectedSplit had a " + e)
+      }
+    } finally {
+      oisTracker.close()
+      oosTracker.close()
+      clientSocketToTracker.close()
+    }
+    
+    return selectedSplitIndex
+  }
+  
+  class ShuffleTracker(outputLocs: Array[SplitInfo])
+  extends Thread with Logging {
+    var threadPool = Shuffle.newDaemonCachedThreadPool
+    var serverSocket: ServerSocket = null
+
+    // Create trackerStrategy object
+    val trackerStrategyClass = System.getProperty(
+      "spark.shuffle.trackerStrategy", 
+      "spark.BalanceConnectionsShuffleTrackerStrategy")
+    
+    val trackerStrategy =
+      Class.forName(trackerStrategyClass).newInstance().asInstanceOf[ShuffleTrackerStrategy]
+      
+    // Must initialize here by supplying the outputLocs param
+    // TODO: This could be avoided by directly passing it to the constructor
+    trackerStrategy.initialize(outputLocs)
+
+    override def run: Unit = {
+      serverSocket = new ServerSocket(Shuffle.MasterTrackerPort)
+      logInfo("ShuffleTracker" + serverSocket)
+      
+      try {
+        while (true) {
+          var clientSocket: Socket = null
+          try {
+            clientSocket = serverSocket.accept()
+          } catch {
+            case e: Exception => {
+              logInfo("ShuffleTracker had a " + e)
+            }
+          }
+
+          if (clientSocket != null) {
+            try {
+              threadPool.execute(new Thread {
+                override def run: Unit = {
+                  val oos = new ObjectOutputStream(clientSocket.getOutputStream)
+                  oos.flush()
+                  val ois = new ObjectInputStream(clientSocket.getInputStream)
+                  
+                  try {
+                    // Receive intention
+                    val reducerIntention = ois.readObject.asInstanceOf[Int]
+                    
+                    if (reducerIntention == 
+                      TrackedCustomBlockedInMemoryShuffle.ReducerEntering) {
+                      // Receive what the reducer has
+                      val reducerSplitInfo = 
+                        ois.readObject.asInstanceOf[SplitInfo]
+                      
+                      // Select split and update stats if necessary
+                      var selectedSplitIndex = -1
+                      trackerStrategy.synchronized {
+                        selectedSplitIndex = trackerStrategy.selectSplit(
+                          reducerSplitInfo)
+                      }
+                      
+                      // Send reply back
+                      oos.writeObject(selectedSplitIndex)
+                      oos.flush()
+                      
+                      // Update internal stats, only if receiver got the reply
+                      trackerStrategy.synchronized {
+                        trackerStrategy.AddReducerToSplit(reducerSplitInfo, 
+                          selectedSplitIndex)
+                      }
+                    }
+                    else if (reducerIntention == 
+                      TrackedCustomBlockedInMemoryShuffle.ReducerLeaving) {
+                      val reducerSplitInfo = 
+                        ois.readObject.asInstanceOf[SplitInfo]
+
+                      // Receive reception stats: how many blocks the reducer 
+                      // read in how much time and from where
+                      val receptionStat = 
+                        ois.readObject.asInstanceOf[ReceptionStats]
+                      
+                      // Update stats
+                      trackerStrategy.synchronized {
+                        trackerStrategy.deleteReducerFrom(reducerSplitInfo, 
+                          receptionStat)
+                      }
+                        
+                      // Send ACK
+                      oos.writeObject(receptionStat.serverSplitIndex)
+                      oos.flush()
+                    }
+                    else {
+                      throw new SparkException("Undefined reducerIntention")
+                    }
+                  } catch {
+                    // EOFException is expected to happen because receiver can 
+                    // break connection due to timeout and pick random instead
+                    case eofe: java.io.EOFException => { }
+                    case e: Exception => {
+                      logInfo("ShuffleTracker had a " + e)
+                    }
+                  } finally {
+                    ois.close()
+                    oos.close()
+                    clientSocket.close()
+                  }
+                }
+              })
+            } catch {
+              // In failure, close socket here; else, client thread will close
+              case ioe: IOException => {
+                clientSocket.close()
+              }
+            }
+          }
+        }
+      } finally {
+        serverSocket.close()
+      }
+      // Shutdown the thread pool
+      threadPool.shutdown()
+    }  
+  }  
+
+  class ShuffleConsumer(mergeCombiners: (C, C) => C)
+  extends Thread with Logging {   
+    override def run: Unit = {
+      // Run until all splits are here
+      while (receivedData.size > 0) {
+        var splitIndex = -1
+        var recvByteArray: Array[Byte] = null
+      
+        try {
+          var tempPair = receivedData.take().asInstanceOf[(Int, Array[Byte])]
+          splitIndex = tempPair._1
+          recvByteArray = tempPair._2
+        } catch {
+          case e: Exception => {
+            logInfo("Exception during taking data from receivedData")
+          }
+        }      
+      
+        val inputStream = 
+          new ObjectInputStream(new ByteArrayInputStream(recvByteArray))
+          
+        try{
+          while (true) {
+            val (k, c) = inputStream.readObject.asInstanceOf[(K, C)]
+            combiners(k) = combiners.get(k) match {
+              case Some(oldC) => mergeCombiners(oldC, c)
+              case None => c
+            }
+          }
+        } catch {
+          case e: EOFException => { }
+        }
+        inputStream.close()
+      }
+    }
+  }
+
+  class ShuffleClient(splitIndex: Int, serversplitInfo: SplitInfo, 
+    requestSplit: String, myId: Int)
+  extends Thread with Logging {
+    private var peerSocketToSource: Socket = null
+    private var oosSource: ObjectOutputStream = null
+    private var oisSource: ObjectInputStream = null
+
+    private var receptionSucceeded = false
+
+    // Make sure that multiple messages don't go to the tracker
+    private var alreadySentLeavingNotification = false
+
+    // Keep track of bytes received and time spent
+    private var numBytesReceived = 0
+    private var totalTimeSpent = 0
+
+    override def run: Unit = {
+      // Setup the timeout mechanism
+      var timeOutTask = new TimerTask {
+        override def run: Unit = {
+          cleanUp()
+        }
+      }
+      
+      var timeOutTimer = new Timer
+      timeOutTimer.schedule(timeOutTask, Shuffle.MaxKnockInterval)
+      
+      try {
+        // Everything will break if BLOCKNUM is not correctly received
+        // First get BLOCKNUM file if totalBlocksInSplit(splitIndex) is unknown
+        peerSocketToSource = new Socket(
+          serversplitInfo.hostAddress, serversplitInfo.listenPort)
+        oosSource =
+          new ObjectOutputStream(peerSocketToSource.getOutputStream)
+        oosSource.flush()
+        var isSource = peerSocketToSource.getInputStream
+        oisSource = new ObjectInputStream(isSource)
+        
+        // Send path information
+        oosSource.writeObject(requestSplit)   
+        
+        // TODO: Can be optimized. No need to do it everytime.
+        // Receive BLOCKNUM
+        totalBlocksInSplit(splitIndex) = oisSource.readObject.asInstanceOf[Int]
+
+        // Turn the timer OFF, if the sender responds before timeout
+        timeOutTimer.cancel()
+        
+        while (hasBlocksInSplit(splitIndex) < totalBlocksInSplit(splitIndex)) {
+          // Set receptionSucceeded to false before trying for each block
+          receptionSucceeded = false
+
+          // Request specific block
+          oosSource.writeObject(hasBlocksInSplit(splitIndex))
+          
+          // Good to go. First, receive the length of the requested file
+          var requestedFileLen = oisSource.readObject.asInstanceOf[Int]
+          logInfo("Received requestedFileLen = " + requestedFileLen)
+
+          // Create a temp variable to be used in different places
+          val requestPath = "http://%s:%d/shuffle/%s-%d".format(
+            serversplitInfo.hostAddress, serversplitInfo.listenPort, requestSplit, 
+            hasBlocksInSplit(splitIndex))
+
+          // Receive the file
+          if (requestedFileLen != -1) {
+            val readStartTime = System.currentTimeMillis
+            logInfo("BEGIN READ: " + requestPath)
+
+            // Receive data in an Array[Byte]
+            var recvByteArray = new Array[Byte](requestedFileLen)
+            var alreadyRead = 0
+            var bytesRead = 0
+            
+            while (alreadyRead != requestedFileLen) {
+              bytesRead = isSource.read(recvByteArray, alreadyRead, 
+                requestedFileLen - alreadyRead)
+              if (bytesRead > 0) {
+                alreadyRead  = alreadyRead + bytesRead
+              }
+            } 
+            
+            // Make it available to the consumer
+            try {
+              receivedData.put((splitIndex, recvByteArray))
+            } catch {
+              case e: Exception => {
+                logInfo("Exception during putting data into receivedData")
+              }
+            }
+                    
+            // TODO: Updating stats before consumption is completed
+            hasBlocksInSplit(splitIndex) = hasBlocksInSplit(splitIndex) + 1
+            
+            // Split has been received only if all the blocks have been received
+            if (hasBlocksInSplit(splitIndex) == totalBlocksInSplit(splitIndex)) {
+              hasSplitsBitVector.synchronized {
+                hasSplitsBitVector.set(splitIndex)
+              }
+              hasSplits += 1
+            }
+
+            receptionSucceeded = true
+
+            logInfo("END READ: " + requestPath)
+            val readTime = System.currentTimeMillis - readStartTime
+            logInfo("Reading " + requestPath + " took " + readTime + " millis.")
+            
+            // Update stats
+            numBytesReceived = numBytesReceived + requestedFileLen
+            totalTimeSpent = totalTimeSpent + readTime.toInt
+          } else {
+              throw new SparkException("ShuffleServer " + serversplitInfo.hostAddress + " does not have " + requestSplit)
+          }
+        }
+      } catch {
+        // EOFException is expected to happen because sender can break
+        // connection due to timeout
+        case eofe: java.io.EOFException => { }
+        case e: Exception => {
+          logInfo("ShuffleClient had a " + e)
+        }
+      } finally {
+        splitsInRequestBitVector.synchronized {
+          splitsInRequestBitVector.set(splitIndex, false)
+        }
+        cleanUp()
+      }
+    }
+    
+    // Connect to the tracker and update its stats
+    private def sendLeavingNotification(): Unit = synchronized {
+      if (!alreadySentLeavingNotification) {
+        val clientSocketToTracker = new Socket(Shuffle.MasterHostAddress, 
+          Shuffle.MasterTrackerPort)
+        val oosTracker =
+          new ObjectOutputStream(clientSocketToTracker.getOutputStream)
+        oosTracker.flush()
+        val oisTracker =
+          new ObjectInputStream(clientSocketToTracker.getInputStream)
+
+        try {
+          // Send intention
+          oosTracker.writeObject(
+            TrackedCustomBlockedInMemoryShuffle.ReducerLeaving)
+          oosTracker.flush()
+          
+          // Send reducerSplitInfo
+          oosTracker.writeObject(getLocalSplitInfo(myId))
+          oosTracker.flush()
+          
+          // Send reception stats
+          oosTracker.writeObject(ReceptionStats(
+            numBytesReceived, totalTimeSpent, splitIndex))
+          oosTracker.flush()
+          
+          // Receive ACK. No need to do anything with that
+          oisTracker.readObject.asInstanceOf[Int]
+ 
+          // Now update sentLeavingNotifacation
+          alreadySentLeavingNotification = true
+        } catch {
+          case e: Exception => {
+            logInfo("sendLeavingNotification had a " + e)
+          }
+        } finally {
+          oisTracker.close()
+          oosTracker.close()
+          clientSocketToTracker.close()
+        }
+      }
+    }
+    
+    private def cleanUp(): Unit = {
+      // Update tracker stats first
+      sendLeavingNotification()
+    
+      // Clean up the connections to the mapper
+      if (oisSource != null) {
+        oisSource.close()
+      }
+      if (oosSource != null) {
+        oosSource.close()
+      }
+      if (peerSocketToSource != null) {
+        peerSocketToSource.close()
+      }
+      
+      logInfo("Leaving client")
+    }
+  }     
+}
+
+object TrackedCustomBlockedInMemoryShuffle extends Logging {
+  // Tracker communication constants
+  val ReducerEntering = 0
+  val ReducerLeaving = 1
+
+  // Cache for keeping the splits around
+  val splitsCache = new HashMap[String, Array[Byte]]
+
+  private var initialized = false
+  private var nextShuffleId = new AtomicLong(0)
+
+  // Variables initialized by initializeIfNeeded()
+  private var shuffleDir: File = null
+
+  private var shuffleServer: ShuffleServer = null
+  private var serverAddress = InetAddress.getLocalHost.getHostAddress
+  private var serverPort: Int = -1
+  
+  // Random number generator
+  var ranGen = new Random
+  
+  private def initializeIfNeeded() = synchronized {
+    if (!initialized) {
+      // TODO: localDir should be created by some mechanism common to Spark
+      // so that it can be shared among shuffle, broadcast, etc
+      val localDirRoot = System.getProperty("spark.local.dir", "/tmp")
+      var tries = 0
+      var foundLocalDir = false
+      var localDir: File = null
+      var localDirUuid: UUID = null
+      while (!foundLocalDir && tries < 10) {
+        tries += 1
+        try {
+          localDirUuid = UUID.randomUUID
+          localDir = new File(localDirRoot, "spark-local-" + localDirUuid)
+          if (!localDir.exists) {
+            localDir.mkdirs()
+            foundLocalDir = true
+          }
+        } catch {
+          case e: Exception =>
+            logWarning("Attempt " + tries + " to create local dir failed", e)
+        }
+      }
+      if (!foundLocalDir) {
+        logError("Failed 10 attempts to create local dir in " + localDirRoot)
+        System.exit(1)
+      }
+      shuffleDir = new File(localDir, "shuffle")
+      shuffleDir.mkdirs()
+      logInfo("Shuffle dir: " + shuffleDir)
+      
+      // Create and start the shuffleServer      
+      shuffleServer = new ShuffleServer
+      shuffleServer.setDaemon(true)
+      shuffleServer.start()
+      logInfo("ShuffleServer started...")
+
+      initialized = true
+    }
+  }
+  
+  def getSplitName(shuffleId: Long, inputId: Int, outputId: Int, 
+    blockId: Int): String = {
+    initializeIfNeeded()
+    // Adding shuffleDir is unnecessary. Added to keep the parsers working
+    return "%s/%d/%d/%d-%d".format(shuffleDir, shuffleId, inputId, outputId, 
+      blockId)
+  }
+
+  def getBlockNumOutputName(shuffleId: Long, inputId: Int, 
+    outputId: Int): String = {
+    initializeIfNeeded()
+    return "%s/%d/%d/%d-BLOCKNUM".format(shuffleDir, shuffleId, inputId, 
+      outputId)
+  }
+
+  def newShuffleId(): Long = {
+    nextShuffleId.getAndIncrement()
+  }
+  
+  class ShuffleServer
+  extends Thread with Logging {
+    var threadPool = Shuffle.newDaemonFixedThreadPool(Shuffle.MaxTxConnections)
+
+    var serverSocket: ServerSocket = null
+
+    override def run: Unit = {
+      serverSocket = new ServerSocket(0)
+      serverPort = serverSocket.getLocalPort
+
+      logInfo("ShuffleServer started with " + serverSocket)
+      logInfo("Local URI: http://" + serverAddress + ":" + serverPort)
+
+      try {
+        while (true) {
+          var clientSocket: Socket = null
+          try {
+            clientSocket = serverSocket.accept()
+          } catch {
+            case e: Exception => { }
+          }
+          if (clientSocket != null) {
+            logInfo("Serve: Accepted new client connection:" + clientSocket)
+            try {
+              threadPool.execute(new ShuffleServerThread(clientSocket))
+            } catch {
+              // In failure, close socket here; else, the thread will close it
+              case ioe: IOException => {
+                clientSocket.close()
+              }
+            }
+          }
+        }
+      } finally {
+        if (serverSocket != null) {
+          logInfo("ShuffleServer now stopping...")
+          serverSocket.close()
+        }
+      }
+      // Shutdown the thread pool
+      threadPool.shutdown()
+    }
+    
+    class ShuffleServerThread(val clientSocket: Socket)
+    extends Thread with Logging {
+      private val os = clientSocket.getOutputStream.asInstanceOf[OutputStream]
+      os.flush()
+      private val bos = new BufferedOutputStream(os)
+      bos.flush()
+      private val oos = new ObjectOutputStream(os)
+      oos.flush()
+      private val ois = new ObjectInputStream(clientSocket.getInputStream)
+
+      logInfo("new ShuffleServerThread is running")
+      
+      override def run: Unit = {
+        try {
+          // Receive basic path information
+          var requestedSplitBase = ois.readObject.asInstanceOf[String]
+          
+          logInfo("requestedSplitBase: " + requestedSplitBase)
+          
+          // Read BLOCKNUM and send back the total number of blocks
+          val blockNumName = "%s/%s-BLOCKNUM".format(shuffleDir, 
+            requestedSplitBase)
+            
+          val blockNumIn = new ObjectInputStream(new ByteArrayInputStream(
+            TrackedCustomBlockedInMemoryShuffle.splitsCache(blockNumName)))
+          val BLOCKNUM = blockNumIn.readObject.asInstanceOf[Int]
+          blockNumIn.close()
+          
+          oos.writeObject(BLOCKNUM)
+          oos.flush()
+          
+          val startTime = System.currentTimeMillis
+          var curTime = startTime
+          var keepSending = true
+          var numBlocksToSend = Shuffle.MaxChatBlocks
+          
+          while (keepSending && numBlocksToSend > 0) {
+            // Receive specific block request
+            val blockId = ois.readObject.asInstanceOf[Int]
+            
+            // Ready to send
+            var requestedSplit = shuffleDir + "/" + requestedSplitBase + "-" + blockId
+            
+            // Send the length of the requestedSplit to let the receiver know that 
+            // transfer is about to start
+            // In the case of receiver timeout and connection close, this will
+            // throw a java.net.SocketException: Broken pipe
+            var requestedSplitLen = -1
+            
+            try {
+              requestedSplitLen =
+                TrackedCustomBlockedInMemoryShuffle.splitsCache(requestedSplit).length
+            } catch {
+              case e: Exception => { }
+            }
+
+            oos.writeObject(requestedSplitLen)
+            oos.flush()
+            
+            logInfo("requestedSplitLen = " + requestedSplitLen)
+
+            // Read and send the requested file
+            if (requestedSplitLen != -1) {
+              // Send
+              bos.write(TrackedCustomBlockedInMemoryShuffle.splitsCache(requestedSplit),
+                0, requestedSplitLen)
+              bos.flush()
+
+              // Update loop variables
+              numBlocksToSend = numBlocksToSend - 1
+              
+              curTime = System.currentTimeMillis
+              // Revoke sending only if there is anyone waiting in the queue
+              // TODO: Turning OFF the optimization so that reducers go back to
+              // tracker get advice
+              if (curTime - startTime >= Shuffle.MaxChatTime /* &&
+                  threadPool.getQueue.size > 0 */) {
+                keepSending = false
+              }
+            } else {
+              // Close the connection
+            }
+          }
+        } catch {
+          // If something went wrong, e.g., the worker at the other end died etc
+          // then close everything up
+          // Exception can happen if the receiver stops receiving
+          // EOFException is expected to happen because receiver can break
+          // connection as soon as it has all the blocks
+          case eofe: java.io.EOFException => { }
+          case e: Exception => {
+            logInfo("ShuffleServerThread had a " + e)
+          }
+        } finally {
+          logInfo("ShuffleServerThread is closing streams and sockets")
+          ois.close()
+          // TODO: Following can cause "java.net.SocketException: Socket closed"
+          oos.close()
+          bos.close()
+          clientSocket.close()
+        }
+      }
+    }
+  }  
+}
diff --git a/src/scala/spark/TrackedCustomBlockedLocalFileShuffle.scala b/src/scala/spark/TrackedCustomBlockedLocalFileShuffle.scala
index a1032368dd..798aba9598 100644
--- a/src/scala/spark/TrackedCustomBlockedLocalFileShuffle.scala
+++ b/src/scala/spark/TrackedCustomBlockedLocalFileShuffle.scala
@@ -836,6 +836,7 @@ object TrackedCustomBlockedLocalFileShuffle extends Logging {
           blockNumIn.close()
           
           oos.writeObject(BLOCKNUM)
+          oos.flush()
           
           val startTime = System.currentTimeMillis
           var curTime = startTime
-- 
GitLab