Skip to content
Snippets Groups Projects
Commit 676f9828 authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-2953] Allow using short names for io compression codecs

Instead of requiring "org.apache.spark.io.LZ4CompressionCodec", it is easier for users if Spark just accepts "lz4", "lzf", "snappy".

Author: Reynold Xin <rxin@apache.org>

Closes #1873 from rxin/compressionCodecShortForm and squashes the following commits:

9f50962 [Reynold Xin] Specify short-form compression codec names first.
63f78ee [Reynold Xin] Updated configuration documentation.
47b3848 [Reynold Xin] [SPARK-2953] Allow using short names for io compression codecs
parent c235b83e
No related branches found
No related tags found
No related merge requests found
......@@ -46,17 +46,24 @@ trait CompressionCodec {
private[spark] object CompressionCodec {
private val shortCompressionCodecNames = Map(
"lz4" -> classOf[LZ4CompressionCodec].getName,
"lzf" -> classOf[LZFCompressionCodec].getName,
"snappy" -> classOf[SnappyCompressionCodec].getName)
def createCodec(conf: SparkConf): CompressionCodec = {
createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
}
def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader)
val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
.getConstructor(classOf[SparkConf])
ctor.newInstance(conf).asInstanceOf[CompressionCodec]
}
val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName
val DEFAULT_COMPRESSION_CODEC = "snappy"
}
......
......@@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite {
testCodec(codec)
}
test("lz4 compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "lz4")
assert(codec.getClass === classOf[LZ4CompressionCodec])
testCodec(codec)
}
test("lzf compression codec") {
val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
assert(codec.getClass === classOf[LZFCompressionCodec])
testCodec(codec)
}
test("lzf compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "lzf")
assert(codec.getClass === classOf[LZFCompressionCodec])
testCodec(codec)
}
test("snappy compression codec") {
val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
assert(codec.getClass === classOf[SnappyCompressionCodec])
testCodec(codec)
}
test("snappy compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "snappy")
assert(codec.getClass === classOf[SnappyCompressionCodec])
testCodec(codec)
}
}
......@@ -373,10 +373,12 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td><code>spark.io.compression.codec</code></td>
<td>org.apache.spark.io.<br />SnappyCompressionCodec</td>
<td>snappy</td>
<td>
The codec used to compress internal data such as RDD partitions and shuffle outputs.
By default, Spark provides three codecs: <code>org.apache.spark.io.LZ4CompressionCodec</code>,
The codec used to compress internal data such as RDD partitions and shuffle outputs. By default,
Spark provides three codecs: <code>lz4</code>, <code>lzf</code>, and <code>snappy</code>. You
can also use fully qualified class names to specify the codec, e.g.
<code>org.apache.spark.io.LZ4CompressionCodec</code>,
<code>org.apache.spark.io.LZFCompressionCodec</code>,
and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
</td>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment