Skip to content
Snippets Groups Projects
Commit 0ac47083 authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-8146] DataFrame Python API: Alias replace in df.na

Author: Reynold Xin <rxin@databricks.com>

Closes #6688 from rxin/df-alias-replace and squashes the following commits:

774c19c [Reynold Xin] [SPARK-8146] DataFrame Python API: Alias replace in DataFrameNaFunctions.
parent 26d07f1e
No related branches found
No related tags found
No related merge requests found
......@@ -909,8 +909,7 @@ class DataFrame(object):
@since("1.3.1")
def dropna(self, how='any', thresh=None, subset=None):
"""Returns a new :class:`DataFrame` omitting rows with null values.
This is an alias for ``na.drop()``.
:func:`DataFrame.dropna` and :func:`DataFrameNaFunctions.drop` are aliases of each other.
:param how: 'any' or 'all'.
If 'any', drop a row if it contains any nulls.
......@@ -920,13 +919,6 @@ class DataFrame(object):
This overwrites the `how` parameter.
:param subset: optional list of column names to consider.
>>> df4.dropna().show()
+---+------+-----+
|age|height| name|
+---+------+-----+
| 10| 80|Alice|
+---+------+-----+
>>> df4.na.drop().show()
+---+------+-----+
|age|height| name|
......@@ -952,6 +944,7 @@ class DataFrame(object):
@since("1.3.1")
def fillna(self, value, subset=None):
"""Replace null values, alias for ``na.fill()``.
:func:`DataFrame.fillna` and :func:`DataFrameNaFunctions.fill` are aliases of each other.
:param value: int, long, float, string, or dict.
Value to replace null values with.
......@@ -963,7 +956,7 @@ class DataFrame(object):
For example, if `value` is a string, and subset contains a non-string column,
then the non-string column is simply ignored.
>>> df4.fillna(50).show()
>>> df4.na.fill(50).show()
+---+------+-----+
|age|height| name|
+---+------+-----+
......@@ -973,16 +966,6 @@ class DataFrame(object):
| 50| 50| null|
+---+------+-----+
>>> df4.fillna({'age': 50, 'name': 'unknown'}).show()
+---+------+-------+
|age|height| name|
+---+------+-------+
| 10| 80| Alice|
| 5| null| Bob|
| 50| null| Tom|
| 50| null|unknown|
+---+------+-------+
>>> df4.na.fill({'age': 50, 'name': 'unknown'}).show()
+---+------+-------+
|age|height| name|
......@@ -1014,6 +997,8 @@ class DataFrame(object):
@since(1.4)
def replace(self, to_replace, value, subset=None):
"""Returns a new :class:`DataFrame` replacing a value with another value.
:func:`DataFrame.replace` and :func:`DataFrameNaFunctions.replace` are
aliases of each other.
:param to_replace: int, long, float, string, or list.
Value to be replaced.
......@@ -1029,7 +1014,7 @@ class DataFrame(object):
For example, if `value` is a string, and subset contains a non-string column,
then the non-string column is simply ignored.
>>> df4.replace(10, 20).show()
>>> df4.na.replace(10, 20).show()
+----+------+-----+
| age|height| name|
+----+------+-----+
......@@ -1039,7 +1024,7 @@ class DataFrame(object):
|null| null| null|
+----+------+-----+
>>> df4.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
>>> df4.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
+----+------+----+
| age|height|name|
+----+------+----+
......@@ -1090,9 +1075,9 @@ class DataFrame(object):
@since(1.4)
def corr(self, col1, col2, method=None):
"""
Calculates the correlation of two columns of a DataFrame as a double value. Currently only
supports the Pearson Correlation Coefficient.
:func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases.
Calculates the correlation of two columns of a DataFrame as a double value.
Currently only supports the Pearson Correlation Coefficient.
:func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases of each other.
:param col1: The name of the first column
:param col2: The name of the second column
......@@ -1241,7 +1226,10 @@ class DataFrame(object):
import pandas as pd
return pd.DataFrame.from_records(self.collect(), columns=self.columns)
##########################################################################################
# Pandas compatibility
##########################################################################################
groupby = groupBy
drop_duplicates = dropDuplicates
......@@ -1261,6 +1249,8 @@ def _to_scala_map(sc, jm):
class DataFrameNaFunctions(object):
"""Functionality for working with missing data in :class:`DataFrame`.
.. versionadded:: 1.4
"""
def __init__(self, df):
......@@ -1276,9 +1266,16 @@ class DataFrameNaFunctions(object):
fill.__doc__ = DataFrame.fillna.__doc__
def replace(self, to_replace, value, subset=None):
return self.df.replace(to_replace, value, subset)
replace.__doc__ = DataFrame.replace.__doc__
class DataFrameStatFunctions(object):
"""Functionality for statistic functions with :class:`DataFrame`.
.. versionadded:: 1.4
"""
def __init__(self, df):
......
......@@ -32,7 +32,6 @@ def _to_java_cols(cols):
class Window(object):
"""
Utility functions for defining window in DataFrames.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment