Skip to content
Snippets Groups Projects
Unverified Commit 28b645b1 authored by invkrh's avatar invkrh Committed by Sean Owen
Browse files

[SPARK-17855][CORE] Remove query string from jar url

## What changes were proposed in this pull request?

Spark-submit support jar url with http protocol. However, if the url contains any query strings, `worker.DriverRunner.downloadUserJar()` method will throw "Did not see expected jar" exception. This is because this method checks the existance of a downloaded jar whose name contains query strings. This is a problem when your jar is located on some web service which requires some additional information to retrieve the file.

This pr just removes query strings before checking jar existance on worker.

## How was this patch tested?

For now, you can only test this patch by manual test.
* Deploy a spark cluster locally
* Make sure apache httpd service is on
* Save an uber jar, e.g spark-job.jar under `/var/www/html/`
* Use http://localhost/spark-job.jar?param=1 as jar url when running `spark-submit`
* Job should be launched

Author: invkrh <invkrh@gmail.com>

Closes #15420 from invkrh/spark-17855.
parent c8b612de
No related branches found
No related tags found
No related merge requests found
......@@ -18,12 +18,12 @@
package org.apache.spark.deploy.worker
import java.io._
import java.net.URI
import java.nio.charset.StandardCharsets
import scala.collection.JavaConverters._
import com.google.common.io.Files
import org.apache.hadoop.fs.Path
import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.deploy.{DriverDescription, SparkHadoopUtil}
......@@ -147,30 +147,24 @@ private[deploy] class DriverRunner(
* Will throw an exception if there are errors downloading the jar.
*/
private def downloadUserJar(driverDir: File): String = {
val jarPath = new Path(driverDesc.jarUrl)
val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
val destPath = new File(driverDir.getAbsolutePath, jarPath.getName)
val jarFileName = jarPath.getName
val jarFileName = new URI(driverDesc.jarUrl).getPath.split("/").last
val localJarFile = new File(driverDir, jarFileName)
val localJarFilename = localJarFile.getAbsolutePath
if (!localJarFile.exists()) { // May already exist if running multiple workers on one node
logInfo(s"Copying user jar $jarPath to $destPath")
logInfo(s"Copying user jar ${driverDesc.jarUrl} to $localJarFile")
Utils.fetchFile(
driverDesc.jarUrl,
driverDir,
conf,
securityManager,
hadoopConf,
SparkHadoopUtil.get.newConfiguration(conf),
System.currentTimeMillis(),
useCache = false)
if (!localJarFile.exists()) { // Verify copy succeeded
throw new IOException(
s"Can not find expected jar $jarFileName which should have been loaded in $driverDir")
}
}
if (!localJarFile.exists()) { // Verify copy succeeded
throw new Exception(s"Did not see expected jar $jarFileName in $driverDir")
}
localJarFilename
localJarFile.getAbsolutePath
}
private[worker] def prepareAndRunDriver(): Int = {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment