Skip to content
Snippets Groups Projects
Commit abfedb9c authored by Christian Kadner's avatar Christian Kadner Committed by Michael Armbrust
Browse files

[SPARK-9211] [SQL] [TEST] normalize line separators before generating MD5 hash

The golden answer file names for the existing Hive comparison tests were generated using a MD5 hash of the query text which uses Unix-style line separator characters `\n` (LF).
This PR ensures that all occurrences of the Windows-style line separator `\r\n` (CR) are replaced with `\n` (LF) before generating the MD5 hash to produce an identical MD5 hash for golden answer file names generated on Windows.

Author: Christian Kadner <ckadner@us.ibm.com>

Closes #7563 from ckadner/SPARK-9211_working and squashes the following commits:

d541db0 [Christian Kadner] [SPARK-9211][SQL] normalize line separators before MD5 hash
parent 54c0789a
No related branches found
No related tags found
No related merge requests found
...@@ -124,7 +124,7 @@ abstract class HiveComparisonTest ...@@ -124,7 +124,7 @@ abstract class HiveComparisonTest
protected val cacheDigest = java.security.MessageDigest.getInstance("MD5") protected val cacheDigest = java.security.MessageDigest.getInstance("MD5")
protected def getMd5(str: String): String = { protected def getMd5(str: String): String = {
val digest = java.security.MessageDigest.getInstance("MD5") val digest = java.security.MessageDigest.getInstance("MD5")
digest.update(str.getBytes("utf-8")) digest.update(str.replaceAll(System.lineSeparator(), "\n").getBytes("utf-8"))
new java.math.BigInteger(1, digest.digest).toString(16) new java.math.BigInteger(1, digest.digest).toString(16)
} }
......
...@@ -427,7 +427,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { ...@@ -427,7 +427,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
|'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
|USING 'cat' AS (tKey, tValue) ROW FORMAT SERDE |USING 'cat' AS (tKey, tValue) ROW FORMAT SERDE
|'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' FROM src; |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' FROM src;
""".stripMargin.replaceAll("\n", " ")) """.stripMargin.replaceAll(System.lineSeparator(), " "))
test("transform with SerDe2") { test("transform with SerDe2") {
...@@ -446,7 +446,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { ...@@ -446,7 +446,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
|('avro.schema.literal'='{"namespace": "testing.hive.avro.serde","name": |('avro.schema.literal'='{"namespace": "testing.hive.avro.serde","name":
|"src","type": "record","fields": [{"name":"key","type":"int"}]}') |"src","type": "record","fields": [{"name":"key","type":"int"}]}')
|FROM small_src |FROM small_src
""".stripMargin.replaceAll("\n", " ")).collect().head """.stripMargin.replaceAll(System.lineSeparator(), " ")).collect().head
assert(expected(0) === res(0)) assert(expected(0) === res(0))
} }
...@@ -458,7 +458,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { ...@@ -458,7 +458,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
|('serialization.last.column.takes.rest'='true') USING 'cat' AS (tKey, tValue) |('serialization.last.column.takes.rest'='true') USING 'cat' AS (tKey, tValue)
|ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
|WITH SERDEPROPERTIES ('serialization.last.column.takes.rest'='true') FROM src; |WITH SERDEPROPERTIES ('serialization.last.column.takes.rest'='true') FROM src;
""".stripMargin.replaceAll("\n", " ")) """.stripMargin.replaceAll(System.lineSeparator(), " "))
createQueryTest("transform with SerDe4", createQueryTest("transform with SerDe4",
""" """
...@@ -467,7 +467,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { ...@@ -467,7 +467,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
|('serialization.last.column.takes.rest'='true') USING 'cat' ROW FORMAT SERDE |('serialization.last.column.takes.rest'='true') USING 'cat' ROW FORMAT SERDE
|'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES
|('serialization.last.column.takes.rest'='true') FROM src; |('serialization.last.column.takes.rest'='true') FROM src;
""".stripMargin.replaceAll("\n", " ")) """.stripMargin.replaceAll(System.lineSeparator(), " "))
createQueryTest("LIKE", createQueryTest("LIKE",
"SELECT * FROM src WHERE value LIKE '%1%'") "SELECT * FROM src WHERE value LIKE '%1%'")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment