Skip to content
Snippets Groups Projects
Commit 268b71d0 authored by Shixiong Zhu's avatar Shixiong Zhu Committed by Yin Huai
Browse files

[SPARK-17065][SQL] Improve the error message when encountering an incompatible DataSourceRegister

## What changes were proposed in this pull request?

Add an instruction to ask the user to remove or upgrade the incompatible DataSourceRegister in the error message.

## How was this patch tested?

Test command:
```
build/sbt -Dscala-2.10 package
SPARK_SCALA_VERSION=2.10 bin/spark-shell --packages ai.h2o:sparkling-water-core_2.10:1.6.5

scala> Seq(1).toDS().write.format("parquet").save("foo")
```

Before:
```
java.util.ServiceConfigurationError: org.apache.spark.sql.sources.DataSourceRegister: Provider org.apache.spark.h2o.DefaultSource could not be instantiated
	at java.util.ServiceLoader.fail(ServiceLoader.java:232)
	at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
	at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
	at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
	at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
...
Caused by: java.lang.NoClassDefFoundError: org/apache/spark/Logging
	at java.lang.ClassLoader.defineClass1(Native Method)
	at java.lang.ClassLoader.defineClass(ClassLoader.java:760)
	at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
	at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
	at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
	at java.security.AccessController.doPrivileged(Native Method)
...
```

After:

```
java.lang.ClassNotFoundException: Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: org.apache.spark.sql.sources.DataSourceRegister: Provider org.apache.spark.h2o.DefaultSource could not be instantiated
	at org.apache.spark.sql.execution.datasources.DataSource.lookupDataSource(DataSource.scala:178)
	at org.apache.spark.sql.execution.datasources.DataSource.providingClass$lzycompute(DataSource.scala:79)
	at org.apache.spark.sql.execution.datasources.DataSource.providingClass(DataSource.scala:79)
	at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:441)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:213)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:196)
...
```

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #14651 from zsxwing/SPARK-17065.
parent fffb0c0d
No related branches found
No related tags found
No related merge requests found
......@@ -17,7 +17,7 @@
package org.apache.spark.sql.execution.datasources
import java.util.ServiceLoader
import java.util.{ServiceConfigurationError, ServiceLoader}
import scala.collection.JavaConverters._
import scala.language.{existentials, implicitConversions}
......@@ -124,50 +124,63 @@ case class DataSource(
val loader = Utils.getContextOrSparkClassLoader
val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], loader)
serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList match {
// the provider format did not match any given registered aliases
case Nil =>
try {
Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match {
case Success(dataSource) =>
// Found the data source using fully qualified path
dataSource
case Failure(error) =>
if (provider.toLowerCase == "orc" ||
try {
serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList match {
// the provider format did not match any given registered aliases
case Nil =>
try {
Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match {
case Success(dataSource) =>
// Found the data source using fully qualified path
dataSource
case Failure(error) =>
if (provider.toLowerCase == "orc" ||
provider.startsWith("org.apache.spark.sql.hive.orc")) {
throw new AnalysisException(
"The ORC data source must be used with Hive support enabled")
} else if (provider.toLowerCase == "avro" ||
throw new AnalysisException(
"The ORC data source must be used with Hive support enabled")
} else if (provider.toLowerCase == "avro" ||
provider == "com.databricks.spark.avro") {
throw new AnalysisException(
s"Failed to find data source: ${provider.toLowerCase}. Please use Spark " +
"package http://spark-packages.org/package/databricks/spark-avro")
throw new AnalysisException(
s"Failed to find data source: ${provider.toLowerCase}. Please use Spark " +
"package http://spark-packages.org/package/databricks/spark-avro")
} else {
throw new ClassNotFoundException(
s"Failed to find data source: $provider. Please find packages at " +
"http://spark-packages.org",
error)
}
}
} catch {
case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal
// NoClassDefFoundError's class name uses "/" rather than "." for packages
val className = e.getMessage.replaceAll("/", ".")
if (spark2RemovedClasses.contains(className)) {
throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
"Please check if your library is compatible with Spark 2.0", e)
} else {
throw new ClassNotFoundException(
s"Failed to find data source: $provider. Please find packages at " +
"http://spark-packages.org",
error)
throw e
}
}
} catch {
case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal
// NoClassDefFoundError's class name uses "/" rather than "." for packages
val className = e.getMessage.replaceAll("/", ".")
if (spark2RemovedClasses.contains(className)) {
throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
"Please check if your library is compatible with Spark 2.0", e)
} else {
throw e
}
case head :: Nil =>
// there is exactly one registered alias
head.getClass
case sources =>
// There are multiple registered aliases for the input
sys.error(s"Multiple sources found for $provider " +
s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
"please specify the fully qualified class name.")
}
} catch {
case e: ServiceConfigurationError if e.getCause.isInstanceOf[NoClassDefFoundError] =>
// NoClassDefFoundError's class name uses "/" rather than "." for packages
val className = e.getCause.getMessage.replaceAll("/", ".")
if (spark2RemovedClasses.contains(className)) {
throw new ClassNotFoundException(s"Detected an incompatible DataSourceRegister. " +
"Please remove the incompatible library from classpath or upgrade it. " +
s"Error: ${e.getMessage}", e)
} else {
throw e
}
case head :: Nil =>
// there is exactly one registered alias
head.getClass
case sources =>
// There are multiple registered aliases for the input
sys.error(s"Multiple sources found for $provider " +
s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
"please specify the fully qualified class name.")
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment