From 454ba4d67e782369627dfe60261e6648a27b91a0 Mon Sep 17 00:00:00 2001
From: Sun Rui <sunrui2016@gmail.com>
Date: Sun, 8 May 2016 00:17:36 -0700
Subject: [PATCH] [SPARK-12479][SPARKR] sparkR collect on GroupedData throws R
 error "missing value where TRUE/FALSE needed"

## What changes were proposed in this pull request?

This PR is a workaround for NA handling in hash code computation.

This PR is on behalf of paulomagalhaes whose PR is https://github.com/apache/spark/pull/10436

## How was this patch tested?
SparkR unit tests.

Author: Sun Rui <sunrui2016@gmail.com>
Author: ray <ray@rays-MacBook-Air.local>

Closes #12976 from sun-rui/SPARK-12479.
---
 R/pkg/R/utils.R                        | 3 +++
 R/pkg/inst/tests/testthat/test_utils.R | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index bf67e231d5..784f737180 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -157,8 +157,11 @@ wrapInt <- function(value) {
 
 # Multiply `val` by 31 and add `addVal` to the result. Ensures that
 # integer-overflows are handled at every step.
+#
+# TODO: this function does not handle integer overflow well
 mult31AndAdd <- function(val, addVal) {
   vec <- c(bitwShiftL(val, c(4, 3, 2, 1, 0)), addVal)
+  vec[is.na(vec)] <- 0
   Reduce(function(a, b) {
           wrapInt(as.numeric(a) + as.numeric(b))
          },
diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
index 01694ab5c4..54d2eca50e 100644
--- a/R/pkg/inst/tests/testthat/test_utils.R
+++ b/R/pkg/inst/tests/testthat/test_utils.R
@@ -164,3 +164,7 @@ test_that("convertToJSaveMode", {
   expect_error(convertToJSaveMode("foo"),
     'mode should be one of "append", "overwrite", "error", "ignore"') #nolint
 })
+
+test_that("hashCode", {
+  expect_error(hashCode("bc53d3605e8a5b7de1e8e271c2317645"), NA)
+})
-- 
GitLab