From ecfb3127670c7f15e3a15e7f51fa578532480cda Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 23 Jul 2015 10:32:11 -0700
Subject: [PATCH] [SPARK-9243] [Documentation] null -> zero in crosstab doc

We forgot to update doc. brkyvz

Author: Xiangrui Meng <meng@databricks.com>

Closes #7608 from mengxr/SPARK-9243 and squashes the following commits:

0ea3236 [Xiangrui Meng] null -> zero in crosstab doc
---
 R/pkg/R/DataFrame.R                                             | 2 +-
 python/pyspark/sql/dataframe.py                                 | 2 +-
 .../scala/org/apache/spark/sql/DataFrameStatFunctions.scala     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 06dd6b75df..f4c93d3c7d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1566,7 +1566,7 @@ setMethod("fillna",
 #' @return a local R data.frame representing the contingency table. The first column of each row
 #'         will be the distinct values of `col1` and the column names will be the distinct values
 #'         of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no
-#'         occurrences will have `null` as their counts.
+#'         occurrences will have zero as their counts.
 #'
 #' @rdname statfunctions
 #' @export
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 83e02b85f0..d76e051bd7 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1130,7 +1130,7 @@ class DataFrame(object):
         non-zero pair frequencies will be returned.
         The first column of each row will be the distinct values of `col1` and the column names
         will be the distinct values of `col2`. The name of the first column will be `$col1_$col2`.
-        Pairs that have no occurrences will have `null` as their counts.
+        Pairs that have no occurrences will have zero as their counts.
         :func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases.
 
         :param col1: The name of the first column. Distinct items will make the first item of
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index 587869e57f..4ec58082e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -77,7 +77,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * pair frequencies will be returned.
    * The first column of each row will be the distinct values of `col1` and the column names will
    * be the distinct values of `col2`. The name of the first column will be `$col1_$col2`. Counts
-   * will be returned as `Long`s. Pairs that have no occurrences will have `null` as their counts.
+   * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts.
    * Null elements will be replaced by "null", and back ticks will be dropped from elements if they
    * exist.
    *
-- 
GitLab