From bc36b0f1a1f38060700903be7bf5cf012b414551 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 17 Jan 2016 11:02:37 -0800
Subject: [PATCH] [SQL] [MINOR] speed up hashcode for UTF8String

similar to https://github.com/apache/spark/pull/10784, use `Murmur3_x86_32.hashUnsafeBytes` instead.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #10791 from cloud-fan/string-hashcode.
---
 .../java/org/apache/spark/unsafe/types/UTF8String.java     | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 5b61386808..87706d0b68 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -31,6 +31,7 @@ import com.esotericsoftware.kryo.io.Output;
 
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.hash.Murmur3_x86_32;
 
 import static org.apache.spark.unsafe.Platform.*;
 
@@ -935,11 +936,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
 
   @Override
   public int hashCode() {
-    int result = 1;
-    for (int i = 0; i < numBytes; i ++) {
-      result = 31 * result + getByte(i);
-    }
-    return result;
+    return Murmur3_x86_32.hashUnsafeBytes(base, offset, numBytes, 42);
   }
 
   /**
-- 
GitLab