Skip to content
Snippets Groups Projects
Commit 464a3c1e authored by Josh Rosen's avatar Josh Rosen
Browse files

[SPARK-14435][BUILD] Shade Kryo in our custom Hive 1.2.1 fork

This patch updates our custom Hive 1.2.1 fork in order to shade Kryo in Hive. This is a blocker for upgrading Spark to use Kryo 3 (see #12076).

The source for this new fork of Hive can be found at https://github.com/JoshRosen/hive/tree/release-1.2.1-spark2

Here's the complete diff from the official Hive 1.2.1 release: https://github.com/apache/hive/compare/release-1.2.1...JoshRosen:release-1.2.1-spark2

Here's the diff from the sources that pwendell used to publish the current `1.2.1.spark` release of Hive: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2. This diff looks large because his branch used a shell script to rewrite the groupId, whereas I had to commit the groupId changes in order to prevent the find-and-replace from affecting the package names in our relocated Kryo classes: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2#diff-6ada9aaec70e069df8f2c34c5519dd1e

Using these changes, I was able to publish a local version of Hive and verify that this change fixes the test failures which are blocking #12076. Note that this PR will not compile until we complete the review of the Hive POM changes and stage and publish a release.

/cc vanzin, steveloughran, and pwendell for review.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #12215 from JoshRosen/shade-kryo-in-hive.
parent f8c9beca
No related branches found
No related tags found
No related merge requests found
......@@ -131,7 +131,7 @@
<curator.version>2.4.0</curator.version>
<hive.group>org.spark-project.hive</hive.group>
<!-- Version used in Maven Hive dependency -->
<hive.version>1.2.1.spark</hive.version>
<hive.version>1.2.1.spark2</hive.version>
<!-- Version used for internal directory structure -->
<hive.version.short>1.2.1</hive.version.short>
<derby.version>10.10.1.1</derby.version>
......
......@@ -24,8 +24,6 @@ import scala.collection.JavaConverters._
import scala.language.implicitConversions
import scala.reflect.ClassTag
import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.google.common.base.Objects
import org.apache.avro.Schema
import org.apache.hadoop.conf.Configuration
......@@ -37,6 +35,8 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils}
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector
import org.apache.hadoop.io.Writable
import org.apache.hive.com.esotericsoftware.kryo.Kryo
import org.apache.hive.com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.types.Decimal
......
......@@ -24,9 +24,7 @@ import org.apache.spark.SparkFunSuite
/**
* Verify that some classes load and that others are not found on the classpath.
*
*
* This is used to detect classpath and shading conflict, especially between
* Spark's required Kryo version and that which can be found in some Hive versions.
* This is used to detect classpath and shading conflicts.
*/
class ClasspathDependenciesSuite extends SparkFunSuite {
private val classloader = this.getClass.getClassLoader
......@@ -40,10 +38,6 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
classloader.loadClass(classname)
}
private def assertLoads(classes: String*): Unit = {
classes.foreach(assertLoads)
}
private def findResource(classname: String): URL = {
val resource = resourceName(classname)
classloader.getResource(resource)
......@@ -63,17 +57,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
}
}
private def assertClassNotFound(classes: String*): Unit = {
classes.foreach(assertClassNotFound)
test("shaded Protobuf") {
assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
}
private val KRYO = "com.esotericsoftware.kryo.Kryo"
private val SPARK_HIVE = "org.apache.hive."
private val SPARK_SHADED = "org.spark-project.hive.shaded."
test("shaded Protobuf") {
assertLoads(SPARK_SHADED + "com.google.protobuf.ServiceException")
test("shaded Kryo") {
assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
}
test("hive-common") {
......@@ -86,25 +75,13 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
private val STD_INSTANTIATOR = "org.objenesis.strategy.StdInstantiatorStrategy"
test("unshaded kryo") {
assertLoads(KRYO, STD_INSTANTIATOR)
}
test("Forbidden Dependencies") {
assertClassNotFound(
SPARK_HIVE + KRYO,
SPARK_SHADED + KRYO,
"org.apache.hive." + KRYO,
"com.esotericsoftware.shaded." + STD_INSTANTIATOR,
SPARK_HIVE + "com.esotericsoftware.shaded." + STD_INSTANTIATOR,
"org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR
)
assertClassNotFound("com.esotericsoftware.shaded." + STD_INSTANTIATOR)
assertClassNotFound("org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR)
}
test("parquet-hadoop-bundle") {
assertLoads(
"parquet.hadoop.ParquetOutputFormat",
"parquet.hadoop.ParquetInputFormat"
)
assertLoads("parquet.hadoop.ParquetOutputFormat")
assertLoads("parquet.hadoop.ParquetInputFormat")
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment