Skip to content
Snippets Groups Projects
Commit 2dc04808 authored by Ryan Blue's avatar Ryan Blue Committed by Reynold Xin
Browse files

[SPARK-17532] Add lock debugging info to thread dumps.

## What changes were proposed in this pull request?

This adds information to the web UI thread dump page about the JVM locks
held by threads and the locks that threads are blocked waiting to
acquire. This should help find cases where lock contention is causing
Spark applications to run slowly.
## How was this patch tested?

Tested by applying this patch and viewing the change in the web UI.

![thread-lock-info](https://cloud.githubusercontent.com/assets/87915/18493057/6e5da870-79c3-11e6-8c20-f54c18a37544.png)

Additions:
- A "Thread Locking" column with the locks held by the thread or that are blocking the thread
- Links from the a blocked thread to the thread holding the lock
- Stack frames show where threads are inside `synchronized` blocks, "holding Monitor(...)"

Author: Ryan Blue <blue@apache.org>

Closes #15088 from rdblue/SPARK-17532-add-thread-lock-info.
parent 85c5424d
No related branches found
No related tags found
No related merge requests found
...@@ -36,7 +36,7 @@ function toggleThreadStackTrace(threadId, forceAdd) { ...@@ -36,7 +36,7 @@ function toggleThreadStackTrace(threadId, forceAdd) {
if (stackTrace.length == 0) { if (stackTrace.length == 0) {
var stackTraceText = $('#' + threadId + "_td_stacktrace").html() var stackTraceText = $('#' + threadId + "_td_stacktrace").html()
var threadCell = $("#thread_" + threadId + "_tr") var threadCell = $("#thread_" + threadId + "_tr")
threadCell.after("<tr id=\"" + threadId +"_stacktrace\" class=\"accordion-body\"><td colspan=\"3\"><pre>" + threadCell.after("<tr id=\"" + threadId +"_stacktrace\" class=\"accordion-body\"><td colspan=\"4\"><pre>" +
stackTraceText + "</pre></td></tr>") stackTraceText + "</pre></td></tr>")
} else { } else {
if (!forceAdd) { if (!forceAdd) {
...@@ -73,6 +73,7 @@ function onMouseOverAndOut(threadId) { ...@@ -73,6 +73,7 @@ function onMouseOverAndOut(threadId) {
$("#" + threadId + "_td_id").toggleClass("threaddump-td-mouseover"); $("#" + threadId + "_td_id").toggleClass("threaddump-td-mouseover");
$("#" + threadId + "_td_name").toggleClass("threaddump-td-mouseover"); $("#" + threadId + "_td_name").toggleClass("threaddump-td-mouseover");
$("#" + threadId + "_td_state").toggleClass("threaddump-td-mouseover"); $("#" + threadId + "_td_state").toggleClass("threaddump-td-mouseover");
$("#" + threadId + "_td_locking").toggleClass("threaddump-td-mouseover");
} }
function onSearchStringChange() { function onSearchStringChange() {
......
...@@ -48,6 +48,16 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage ...@@ -48,6 +48,16 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
} }
}.map { thread => }.map { thread =>
val threadId = thread.threadId val threadId = thread.threadId
val blockedBy = thread.blockedByThreadId match {
case Some(blockedByThreadId) =>
<div>
Blocked by <a href={s"#${thread.blockedByThreadId}_td_id"}>
Thread {thread.blockedByThreadId} {thread.blockedByLock}</a>
</div>
case None => Text("")
}
val heldLocks = thread.holdingLocks.mkString(", ")
<tr id={s"thread_${threadId}_tr"} class="accordion-heading" <tr id={s"thread_${threadId}_tr"} class="accordion-heading"
onclick={s"toggleThreadStackTrace($threadId, false)"} onclick={s"toggleThreadStackTrace($threadId, false)"}
onmouseover={s"onMouseOverAndOut($threadId)"} onmouseover={s"onMouseOverAndOut($threadId)"}
...@@ -55,6 +65,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage ...@@ -55,6 +65,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
<td id={s"${threadId}_td_id"}>{threadId}</td> <td id={s"${threadId}_td_id"}>{threadId}</td>
<td id={s"${threadId}_td_name"}>{thread.threadName}</td> <td id={s"${threadId}_td_name"}>{thread.threadName}</td>
<td id={s"${threadId}_td_state"}>{thread.threadState}</td> <td id={s"${threadId}_td_state"}>{thread.threadState}</td>
<td id={s"${threadId}_td_locking"}>{blockedBy}{heldLocks}</td>
<td id={s"${threadId}_td_stacktrace"} class="hidden">{thread.stackTrace}</td> <td id={s"${threadId}_td_stacktrace"} class="hidden">{thread.stackTrace}</td>
</tr> </tr>
} }
...@@ -86,6 +97,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage ...@@ -86,6 +97,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
<th onClick="collapseAllThreadStackTrace(false)">Thread ID</th> <th onClick="collapseAllThreadStackTrace(false)">Thread ID</th>
<th onClick="collapseAllThreadStackTrace(false)">Thread Name</th> <th onClick="collapseAllThreadStackTrace(false)">Thread Name</th>
<th onClick="collapseAllThreadStackTrace(false)">Thread State</th> <th onClick="collapseAllThreadStackTrace(false)">Thread State</th>
<th onClick="collapseAllThreadStackTrace(false)">Thread Locks</th>
</thead> </thead>
<tbody>{dumpRows}</tbody> <tbody>{dumpRows}</tbody>
</table> </table>
......
...@@ -24,4 +24,8 @@ private[spark] case class ThreadStackTrace( ...@@ -24,4 +24,8 @@ private[spark] case class ThreadStackTrace(
threadId: Long, threadId: Long,
threadName: String, threadName: String,
threadState: Thread.State, threadState: Thread.State,
stackTrace: String) stackTrace: String,
blockedByThreadId: Option[Long],
blockedByLock: String,
holdingLocks: Seq[String])
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
package org.apache.spark.util package org.apache.spark.util
import java.io._ import java.io._
import java.lang.management.ManagementFactory import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo}
import java.net._ import java.net._
import java.nio.ByteBuffer import java.nio.ByteBuffer
import java.nio.channels.Channels import java.nio.channels.Channels
...@@ -2096,15 +2096,41 @@ private[spark] object Utils extends Logging { ...@@ -2096,15 +2096,41 @@ private[spark] object Utils extends Logging {
} }
} }
private implicit class Lock(lock: LockInfo) {
def lockString: String = {
lock match {
case monitor: MonitorInfo =>
s"Monitor(${lock.getClassName}@${lock.getIdentityHashCode}})"
case _ =>
s"Lock(${lock.getClassName}@${lock.getIdentityHashCode}})"
}
}
}
/** Return a thread dump of all threads' stacktraces. Used to capture dumps for the web UI */ /** Return a thread dump of all threads' stacktraces. Used to capture dumps for the web UI */
def getThreadDump(): Array[ThreadStackTrace] = { def getThreadDump(): Array[ThreadStackTrace] = {
// We need to filter out null values here because dumpAllThreads() may return null array // We need to filter out null values here because dumpAllThreads() may return null array
// elements for threads that are dead / don't exist. // elements for threads that are dead / don't exist.
val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null) val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null)
threadInfos.sortBy(_.getThreadId).map { case threadInfo => threadInfos.sortBy(_.getThreadId).map { case threadInfo =>
val stackTrace = threadInfo.getStackTrace.map(_.toString).mkString("\n") val monitors = threadInfo.getLockedMonitors.map(m => m.getLockedStackFrame -> m).toMap
ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName, val stackTrace = threadInfo.getStackTrace.map { frame =>
threadInfo.getThreadState, stackTrace) monitors.get(frame) match {
case Some(monitor) =>
monitor.getLockedStackFrame.toString + s" => holding ${monitor.lockString}"
case None =>
frame.toString
}
}.mkString("\n")
// use a set to dedup re-entrant locks that are held at multiple places
val heldLocks = (threadInfo.getLockedSynchronizers.map(_.lockString)
++ threadInfo.getLockedMonitors.map(_.lockString)
).toSet
ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName, threadInfo.getThreadState,
stackTrace, if (threadInfo.getLockOwnerId < 0) None else Some(threadInfo.getLockOwnerId),
Option(threadInfo.getLockInfo).map(_.lockString).getOrElse(""), heldLocks.toSeq)
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment