Skip to content
Snippets Groups Projects
Commit 676f9828 authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-2953] Allow using short names for io compression codecs

Instead of requiring "org.apache.spark.io.LZ4CompressionCodec", it is easier for users if Spark just accepts "lz4", "lzf", "snappy".

Author: Reynold Xin <rxin@apache.org>

Closes #1873 from rxin/compressionCodecShortForm and squashes the following commits:

9f50962 [Reynold Xin] Specify short-form compression codec names first.
63f78ee [Reynold Xin] Updated configuration documentation.
47b3848 [Reynold Xin] [SPARK-2953] Allow using short names for io compression codecs
parent c235b83e
No related branches found
No related tags found
No related merge requests found
...@@ -46,17 +46,24 @@ trait CompressionCodec { ...@@ -46,17 +46,24 @@ trait CompressionCodec {
private[spark] object CompressionCodec { private[spark] object CompressionCodec {
private val shortCompressionCodecNames = Map(
"lz4" -> classOf[LZ4CompressionCodec].getName,
"lzf" -> classOf[LZFCompressionCodec].getName,
"snappy" -> classOf[SnappyCompressionCodec].getName)
def createCodec(conf: SparkConf): CompressionCodec = { def createCodec(conf: SparkConf): CompressionCodec = {
createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC)) createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
} }
def createCodec(conf: SparkConf, codecName: String): CompressionCodec = { def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader) val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
.getConstructor(classOf[SparkConf]) .getConstructor(classOf[SparkConf])
ctor.newInstance(conf).asInstanceOf[CompressionCodec] ctor.newInstance(conf).asInstanceOf[CompressionCodec]
} }
val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName val DEFAULT_COMPRESSION_CODEC = "snappy"
} }
......
...@@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite { ...@@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite {
testCodec(codec) testCodec(codec)
} }
test("lz4 compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "lz4")
assert(codec.getClass === classOf[LZ4CompressionCodec])
testCodec(codec)
}
test("lzf compression codec") { test("lzf compression codec") {
val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName) val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
assert(codec.getClass === classOf[LZFCompressionCodec]) assert(codec.getClass === classOf[LZFCompressionCodec])
testCodec(codec) testCodec(codec)
} }
test("lzf compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "lzf")
assert(codec.getClass === classOf[LZFCompressionCodec])
testCodec(codec)
}
test("snappy compression codec") { test("snappy compression codec") {
val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName) val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
assert(codec.getClass === classOf[SnappyCompressionCodec]) assert(codec.getClass === classOf[SnappyCompressionCodec])
testCodec(codec) testCodec(codec)
} }
test("snappy compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "snappy")
assert(codec.getClass === classOf[SnappyCompressionCodec])
testCodec(codec)
}
} }
...@@ -373,10 +373,12 @@ Apart from these, the following properties are also available, and may be useful ...@@ -373,10 +373,12 @@ Apart from these, the following properties are also available, and may be useful
</tr> </tr>
<tr> <tr>
<td><code>spark.io.compression.codec</code></td> <td><code>spark.io.compression.codec</code></td>
<td>org.apache.spark.io.<br />SnappyCompressionCodec</td> <td>snappy</td>
<td> <td>
The codec used to compress internal data such as RDD partitions and shuffle outputs. The codec used to compress internal data such as RDD partitions and shuffle outputs. By default,
By default, Spark provides three codecs: <code>org.apache.spark.io.LZ4CompressionCodec</code>, Spark provides three codecs: <code>lz4</code>, <code>lzf</code>, and <code>snappy</code>. You
can also use fully qualified class names to specify the codec, e.g.
<code>org.apache.spark.io.LZ4CompressionCodec</code>,
<code>org.apache.spark.io.LZFCompressionCodec</code>, <code>org.apache.spark.io.LZFCompressionCodec</code>,
and <code>org.apache.spark.io.SnappyCompressionCodec</code>. and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
</td> </td>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment