From e096a652b92fc64a7b3457cd0766ab324bcc980b Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 17 Dec 2015 14:16:49 -0800
Subject: [PATCH] [SPARK-12397][SQL] Improve error messages for data sources
 when they are not found

Point users to spark-packages.org to find them.

Author: Reynold Xin <rxin@databricks.com>

Closes #10351 from rxin/SPARK-12397.
---
 .../datasources/ResolvedDataSource.scala      | 50 ++++++++++++-------
 .../sql/sources/ResolvedDataSourceSuite.scala | 17 +++++++
 2 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ResolvedDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ResolvedDataSource.scala
index 86a306b8f9..e02ee6cd6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ResolvedDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ResolvedDataSource.scala
@@ -57,24 +57,38 @@ object ResolvedDataSource extends Logging {
     val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], loader)
 
     serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList match {
-      /** the provider format did not match any given registered aliases */
-      case Nil => Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match {
-        case Success(dataSource) => dataSource
-        case Failure(error) =>
-          if (provider.startsWith("org.apache.spark.sql.hive.orc")) {
-            throw new ClassNotFoundException(
-              "The ORC data source must be used with Hive support enabled.", error)
-          } else {
-            throw new ClassNotFoundException(
-              s"Failed to load class for data source: $provider.", error)
-          }
-      }
-      /** there is exactly one registered alias */
-      case head :: Nil => head.getClass
-      /** There are multiple registered aliases for the input */
-      case sources => sys.error(s"Multiple sources found for $provider, " +
-        s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
-        "please specify the fully qualified class name.")
+      // the provider format did not match any given registered aliases
+      case Nil =>
+        Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match {
+          case Success(dataSource) =>
+            // Found the data source using fully qualified path
+            dataSource
+          case Failure(error) =>
+            if (provider.startsWith("org.apache.spark.sql.hive.orc")) {
+              throw new ClassNotFoundException(
+                "The ORC data source must be used with Hive support enabled.", error)
+            } else {
+              if (provider == "avro" || provider == "com.databricks.spark.avro") {
+                throw new ClassNotFoundException(
+                  s"Failed to find data source: $provider. Please use Spark package " +
+                  "http://spark-packages.org/package/databricks/spark-avro",
+                  error)
+              } else {
+                throw new ClassNotFoundException(
+                  s"Failed to find data source: $provider. Please find packages at " +
+                  "http://spark-packages.org",
+                  error)
+              }
+            }
+        }
+      case head :: Nil =>
+        // there is exactly one registered alias
+        head.getClass
+      case sources =>
+        // There are multiple registered aliases for the input
+        sys.error(s"Multiple sources found for $provider " +
+          s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
+          "please specify the fully qualified class name.")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
index 27d1cd92fc..cb6e5179b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
@@ -57,4 +57,21 @@ class ResolvedDataSourceSuite extends SparkFunSuite {
       ResolvedDataSource.lookupDataSource("org.apache.spark.sql.parquet") ===
         classOf[org.apache.spark.sql.execution.datasources.parquet.DefaultSource])
   }
+
+  test("error message for unknown data sources") {
+    val error1 = intercept[ClassNotFoundException] {
+      ResolvedDataSource.lookupDataSource("avro")
+    }
+    assert(error1.getMessage.contains("spark-packages"))
+
+    val error2 = intercept[ClassNotFoundException] {
+      ResolvedDataSource.lookupDataSource("com.databricks.spark.avro")
+    }
+    assert(error2.getMessage.contains("spark-packages"))
+
+    val error3 = intercept[ClassNotFoundException] {
+      ResolvedDataSource.lookupDataSource("asfdwefasdfasdf")
+    }
+    assert(error3.getMessage.contains("spark-packages"))
+  }
 }
-- 
GitLab