Skip to content
Snippets Groups Projects
Commit a5719804 authored by Marcelo Vanzin's avatar Marcelo Vanzin Committed by Andrew Or
Browse files

[SPARK-11071] [LAUNCHER] Fix flakiness in LauncherServerSuite::timeout.

The test could fail depending on scheduling of the various threads
involved; the change removes some sources of races, while making the
test a little more resilient by trying a few times before giving up.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #9079 from vanzin/SPARK-11071.
parent b591de7c
No related branches found
No related tags found
No related merge requests found
...@@ -242,7 +242,14 @@ class LauncherServer implements Closeable { ...@@ -242,7 +242,14 @@ class LauncherServer implements Closeable {
synchronized (clients) { synchronized (clients) {
clients.add(clientConnection); clients.add(clientConnection);
} }
timeoutTimer.schedule(timeout, getConnectionTimeout()); long timeoutMs = getConnectionTimeout();
// 0 is used for testing to avoid issues with clock resolution / thread scheduling,
// and force an immediate timeout.
if (timeoutMs > 0) {
timeoutTimer.schedule(timeout, getConnectionTimeout());
} else {
timeout.run();
}
} }
} }
} catch (IOException ioe) { } catch (IOException ioe) {
......
...@@ -121,12 +121,12 @@ public class LauncherServerSuite extends BaseSuite { ...@@ -121,12 +121,12 @@ public class LauncherServerSuite extends BaseSuite {
@Test @Test
public void testTimeout() throws Exception { public void testTimeout() throws Exception {
final long TEST_TIMEOUT = 10L;
ChildProcAppHandle handle = null; ChildProcAppHandle handle = null;
TestClient client = null; TestClient client = null;
try { try {
SparkLauncher.setConfig(SparkLauncher.CHILD_CONNECTION_TIMEOUT, String.valueOf(TEST_TIMEOUT)); // LauncherServer will immediately close the server-side socket when the timeout is set
// to 0.
SparkLauncher.setConfig(SparkLauncher.CHILD_CONNECTION_TIMEOUT, "0");
handle = LauncherServer.newAppHandle(); handle = LauncherServer.newAppHandle();
...@@ -134,12 +134,29 @@ public class LauncherServerSuite extends BaseSuite { ...@@ -134,12 +134,29 @@ public class LauncherServerSuite extends BaseSuite {
LauncherServer.getServerInstance().getPort()); LauncherServer.getServerInstance().getPort());
client = new TestClient(s); client = new TestClient(s);
Thread.sleep(TEST_TIMEOUT * 10); // Try a few times since the client-side socket may not reflect the server-side close
try { // immediately.
client.send(new Hello(handle.getSecret(), "1.4.0")); boolean helloSent = false;
fail("Expected exception caused by connection timeout."); int maxTries = 10;
} catch (IllegalStateException e) { for (int i = 0; i < maxTries; i++) {
// Expected. try {
if (!helloSent) {
client.send(new Hello(handle.getSecret(), "1.4.0"));
helloSent = true;
} else {
client.send(new SetAppId("appId"));
}
fail("Expected exception caused by connection timeout.");
} catch (IllegalStateException | IOException e) {
// Expected.
break;
} catch (AssertionError e) {
if (i < maxTries - 1) {
Thread.sleep(100);
} else {
throw new AssertionError("Test failed after " + maxTries + " attempts.", e);
}
}
} }
} finally { } finally {
SparkLauncher.launcherConfig.remove(SparkLauncher.CHILD_CONNECTION_TIMEOUT); SparkLauncher.launcherConfig.remove(SparkLauncher.CHILD_CONNECTION_TIMEOUT);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment