From 8fd55358b7fc1c7545d823bef7b39769f731c1ee Mon Sep 17 00:00:00 2001 From: Reynold Xin <rxin@databricks.com> Date: Tue, 12 May 2015 18:37:02 -0700 Subject: [PATCH] [SPARK-7588] Document all SQL/DataFrame public methods with @since tag This pull request adds since tag to all public methods/classes in SQL/DataFrame to indicate which version the methods/classes were first added. Author: Reynold Xin <rxin@databricks.com> Closes #6101 from rxin/tbc and squashes the following commits: ed55e11 [Reynold Xin] Add since version to all DataFrame methods. --- .../java/org/apache/spark/sql/SaveMode.java | 10 ++ .../scala/org/apache/spark/sql/Column.scala | 145 ++++++++++++++++-- .../org/apache/spark/sql/DataFrame.scala | 103 +++++++++++++ .../apache/spark/sql/DataFrameHolder.scala | 2 + .../spark/sql/DataFrameNaFunctions.scala | 44 ++++++ .../spark/sql/DataFrameStatFunctions.scala | 18 +++ .../spark/sql/ExperimentalMethods.scala | 4 + .../org/apache/spark/sql/GroupedData.scala | 22 +++ .../apache/spark/sql/JavaTypeInference.scala | 3 +- .../org/apache/spark/sql/SQLContext.scala | 94 +++++++++++- .../org/apache/spark/sql/SparkSQLParser.scala | 1 - .../apache/spark/sql/UDFRegistration.scala | 49 ++++++ .../spark/sql/UserDefinedFunction.scala | 4 + .../org/apache/spark/sql/functions.scala | 140 ++++++++++++++++- .../apache/spark/sql/sources/filters.scala | 30 ++++ .../apache/spark/sql/sources/interfaces.scala | 57 +++++++ .../apache/spark/sql/hive/HiveContext.scala | 6 + 17 files changed, 706 insertions(+), 26 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java index a40be526d0..9665c3c46f 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java +++ b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java @@ -18,28 +18,38 @@ package org.apache.spark.sql; /** * SaveMode is used to specify the expected behavior of saving a DataFrame to a data source. + * + * @since 1.3.0 */ public enum SaveMode { /** * Append mode means that when saving a DataFrame to a data source, if data/table already exists, * contents of the DataFrame are expected to be appended to existing data. + * + * @since 1.3.0 */ Append, /** * Overwrite mode means that when saving a DataFrame to a data source, * if data/table already exists, existing data is expected to be overwritten by the contents of * the DataFrame. + * + * @since 1.3.0 */ Overwrite, /** * ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists, * an exception is expected to be thrown. + * + * @since 1.3.0 */ ErrorIfExists, /** * Ignore mode means that when saving a DataFrame to a data source, if data already exists, * the save operation is expected to not save the contents of the DataFrame and to not * change the existing data. + * + * @since 1.3.0 */ Ignore } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 4d50821620..4773dedf72 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -45,6 +45,8 @@ private[sql] object Column { * @groupname expr_ops Expression operators. * @groupname df_ops DataFrame functions. * @groupname Ungrouped Support functions for DataFrames. + * + * @since 1.3.0 */ @Experimental class Column(protected[sql] val expr: Expression) extends Logging { @@ -77,6 +79,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * of every struct in that array, and return an Array of fields * * @group expr_ops + * @since 1.4.0 */ def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr) @@ -92,6 +95,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def unary_- : Column = UnaryMinus(expr) @@ -107,6 +111,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def unary_! : Column = Not(expr) @@ -122,6 +127,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def === (other: Any): Column = { val right = lit(other).expr @@ -145,6 +151,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def equalTo(other: Any): Column = this === other @@ -161,6 +168,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def !== (other: Any): Column = Not(EqualTo(expr, lit(other).expr)) @@ -177,6 +185,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def notEqual(other: Any): Column = Not(EqualTo(expr, lit(other).expr)) @@ -192,6 +201,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def > (other: Any): Column = GreaterThan(expr, lit(other).expr) @@ -207,6 +217,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def gt(other: Any): Column = this > other @@ -221,6 +232,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def < (other: Any): Column = LessThan(expr, lit(other).expr) @@ -235,6 +247,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def lt(other: Any): Column = this < other @@ -249,6 +262,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def <= (other: Any): Column = LessThanOrEqual(expr, lit(other).expr) @@ -263,6 +277,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def leq(other: Any): Column = this <= other @@ -277,6 +292,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def >= (other: Any): Column = GreaterThanOrEqual(expr, lit(other).expr) @@ -291,6 +307,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def geq(other: Any): Column = this >= other @@ -298,6 +315,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Equality test that is safe for null values. * * @group expr_ops + * @since 1.3.0 */ def <=> (other: Any): Column = EqualNullSafe(expr, lit(other).expr) @@ -305,6 +323,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Equality test that is safe for null values. * * @group java_expr_ops + * @since 1.3.0 */ def eqNullSafe(other: Any): Column = this <=> other @@ -312,6 +331,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * True if the current column is between the lower bound and upper bound, inclusive. * * @group java_expr_ops + * @since 1.4.0 */ def between(lowerBound: Any, upperBound: Any): Column = { (this >= lowerBound) && (this <= upperBound) @@ -321,6 +341,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * True if the current expression is null. * * @group expr_ops + * @since 1.3.0 */ def isNull: Column = IsNull(expr) @@ -328,6 +349,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * True if the current expression is NOT null. * * @group expr_ops + * @since 1.3.0 */ def isNotNull: Column = IsNotNull(expr) @@ -342,6 +364,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def || (other: Any): Column = Or(expr, lit(other).expr) @@ -356,6 +379,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def or(other: Column): Column = this || other @@ -370,6 +394,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def && (other: Any): Column = And(expr, lit(other).expr) @@ -384,6 +409,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def and(other: Column): Column = this && other @@ -398,6 +424,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def + (other: Any): Column = Add(expr, lit(other).expr) @@ -412,6 +439,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def plus(other: Any): Column = this + other @@ -426,6 +454,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def - (other: Any): Column = Subtract(expr, lit(other).expr) @@ -440,6 +469,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def minus(other: Any): Column = this - other @@ -454,6 +484,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def * (other: Any): Column = Multiply(expr, lit(other).expr) @@ -468,6 +499,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def multiply(other: Any): Column = this * other @@ -482,6 +514,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def / (other: Any): Column = Divide(expr, lit(other).expr) @@ -496,6 +529,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def divide(other: Any): Column = this / other @@ -503,6 +537,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Modulo (a.k.a. remainder) expression. * * @group expr_ops + * @since 1.3.0 */ def % (other: Any): Column = Remainder(expr, lit(other).expr) @@ -510,6 +545,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Modulo (a.k.a. remainder) expression. * * @group java_expr_ops + * @since 1.3.0 */ def mod(other: Any): Column = this % other @@ -518,6 +554,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * by the evaluated values of the arguments. * * @group expr_ops + * @since 1.3.0 */ @scala.annotation.varargs def in(list: Column*): Column = In(expr, list.map(_.expr)) @@ -526,6 +563,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * SQL like expression. * * @group expr_ops + * @since 1.3.0 */ def like(literal: String): Column = Like(expr, lit(literal).expr) @@ -533,6 +571,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * SQL RLIKE expression (LIKE with Regex). * * @group expr_ops + * @since 1.3.0 */ def rlike(literal: String): Column = RLike(expr, lit(literal).expr) @@ -541,6 +580,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * or gets a value by key `key` in a [[MapType]]. * * @group expr_ops + * @since 1.3.0 */ def getItem(key: Any): Column = UnresolvedExtractValue(expr, Literal(key)) @@ -548,6 +588,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * An expression that gets a field by name in a [[StructType]]. * * @group expr_ops + * @since 1.3.0 */ def getField(fieldName: String): Column = UnresolvedExtractValue(expr, Literal(fieldName)) @@ -557,6 +598,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @param len expression for the length of the substring. * * @group expr_ops + * @since 1.3.0 */ def substr(startPos: Column, len: Column): Column = Substring(expr, startPos.expr, len.expr) @@ -566,6 +608,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @param len length of the substring. * * @group expr_ops + * @since 1.3.0 */ def substr(startPos: Int, len: Int): Column = Substring(expr, lit(startPos).expr, lit(len).expr) @@ -573,6 +616,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Contains the other element. * * @group expr_ops + * @since 1.3.0 */ def contains(other: Any): Column = Contains(expr, lit(other).expr) @@ -580,6 +624,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String starts with. * * @group expr_ops + * @since 1.3.0 */ def startsWith(other: Column): Column = StartsWith(expr, lit(other).expr) @@ -587,6 +632,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String starts with another string literal. * * @group expr_ops + * @since 1.3.0 */ def startsWith(literal: String): Column = this.startsWith(lit(literal)) @@ -594,6 +640,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String ends with. * * @group expr_ops + * @since 1.3.0 */ def endsWith(other: Column): Column = EndsWith(expr, lit(other).expr) @@ -601,6 +648,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String ends with another string literal. * * @group expr_ops + * @since 1.3.0 */ def endsWith(literal: String): Column = this.endsWith(lit(literal)) @@ -612,6 +660,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: String): Column = Alias(expr, alias)() @@ -623,6 +672,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: Symbol): Column = Alias(expr, alias.name)() @@ -634,6 +684,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: String, metadata: Metadata): Column = { Alias(expr, alias)(explicitMetadata = Some(metadata)) @@ -651,6 +702,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def cast(to: DataType): Column = expr match { // Lift alias out of cast so we can support col.as("name").cast(IntegerType) @@ -668,6 +720,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def cast(to: String): Column = cast(DataTypeParser.parse(to)) @@ -682,6 +735,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def desc: Column = SortOrder(expr, Descending) @@ -696,6 +750,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def asc: Column = SortOrder(expr, Ascending) @@ -703,6 +758,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Prints the expression to the console for debugging purpose. * * @group df_ops + * @since 1.3.0 */ def explain(extended: Boolean): Unit = { if (extended) { @@ -719,6 +775,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.4.0 */ def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr) @@ -729,6 +786,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.4.0 */ def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr) @@ -739,6 +797,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.4.0 */ def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr) @@ -748,61 +807,115 @@ class Column(protected[sql] val expr: Expression) extends Logging { /** * :: Experimental :: * A convenient class used for constructing schema. + * + * @since 1.3.0 */ @Experimental class ColumnName(name: String) extends Column(name) { - /** Creates a new AttributeReference of type boolean */ + /** + * Creates a new [[StructField]] of type boolean. + * @since 1.3.0 + */ def boolean: StructField = StructField(name, BooleanType) - /** Creates a new AttributeReference of type byte */ + /** + * Creates a new [[StructField]] of type byte. + * @since 1.3.0 + */ def byte: StructField = StructField(name, ByteType) - /** Creates a new AttributeReference of type short */ + /** + * Creates a new [[StructField]] of type short. + * @since 1.3.0 + */ def short: StructField = StructField(name, ShortType) - /** Creates a new AttributeReference of type int */ + /** + * Creates a new [[StructField]] of type int. + * @since 1.3.0 + */ def int: StructField = StructField(name, IntegerType) - /** Creates a new AttributeReference of type long */ + /** + * Creates a new [[StructField]] of type long. + * @since 1.3.0 + */ def long: StructField = StructField(name, LongType) - /** Creates a new AttributeReference of type float */ + /** + * Creates a new [[StructField]] of type float. + * @since 1.3.0 + */ def float: StructField = StructField(name, FloatType) - /** Creates a new AttributeReference of type double */ + /** + * Creates a new [[StructField]] of type double. + * @since 1.3.0 + */ def double: StructField = StructField(name, DoubleType) - /** Creates a new AttributeReference of type string */ + /** + * Creates a new [[StructField]] of type string. + * @since 1.3.0 + */ def string: StructField = StructField(name, StringType) - /** Creates a new AttributeReference of type date */ + /** + * Creates a new [[StructField]] of type date. + * @since 1.3.0 + */ def date: StructField = StructField(name, DateType) - /** Creates a new AttributeReference of type decimal */ + /** + * Creates a new [[StructField]] of type decimal. + * @since 1.3.0 + */ def decimal: StructField = StructField(name, DecimalType.Unlimited) - /** Creates a new AttributeReference of type decimal */ + /** + * Creates a new [[StructField]] of type decimal. + * @since 1.3.0 + */ def decimal(precision: Int, scale: Int): StructField = StructField(name, DecimalType(precision, scale)) - /** Creates a new AttributeReference of type timestamp */ + /** + * Creates a new [[StructField]] of type timestamp. + * @since 1.3.0 + */ def timestamp: StructField = StructField(name, TimestampType) - /** Creates a new AttributeReference of type binary */ + /** + * Creates a new [[StructField]] of type binary. + * @since 1.3.0 + */ def binary: StructField = StructField(name, BinaryType) - /** Creates a new AttributeReference of type array */ + /** + * Creates a new [[StructField]] of type array. + * @since 1.3.0 + */ def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType)) - /** Creates a new AttributeReference of type map */ + /** + * Creates a new [[StructField]] of type map. + * @since 1.3.0 + */ def map(keyType: DataType, valueType: DataType): StructField = map(MapType(keyType, valueType)) def map(mapType: MapType): StructField = StructField(name, mapType) - /** Creates a new AttributeReference of type struct */ + /** + * Creates a new [[StructField]] of type struct. + * @since 1.3.0 + */ def struct(fields: StructField*): StructField = struct(StructType(fields)) + /** + * Creates a new [[StructField]] of type struct. + * @since 1.3.0 + */ def struct(structType: StructType): StructField = StructField(name, structType) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 01fd432cc8..c820a67357 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -114,6 +114,7 @@ private[sql] object DataFrame { * @groupname rdd RDD Operations * @groupname output Output Operations * @groupname action Actions + * @since 1.3.0 */ // TODO: Improve documentation. @Experimental @@ -233,6 +234,7 @@ class DataFrame private[sql]( /** * Returns the object itself. * @group basic + * @since 1.3.0 */ // This is declared with parentheses to prevent the Scala compiler from treating // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame. @@ -247,6 +249,7 @@ class DataFrame private[sql]( * rdd.toDF("id", "name") // this creates a DataFrame with column name "id" and "name" * }}} * @group basic + * @since 1.3.0 */ @scala.annotation.varargs def toDF(colNames: String*): DataFrame = { @@ -264,12 +267,14 @@ class DataFrame private[sql]( /** * Returns the schema of this [[DataFrame]]. * @group basic + * @since 1.3.0 */ def schema: StructType = queryExecution.analyzed.schema /** * Returns all column names and their data types as an array. * @group basic + * @since 1.3.0 */ def dtypes: Array[(String, String)] = schema.fields.map { field => (field.name, field.dataType.toString) @@ -278,18 +283,21 @@ class DataFrame private[sql]( /** * Returns all column names as an array. * @group basic + * @since 1.3.0 */ def columns: Array[String] = schema.fields.map(_.name) /** * Prints the schema to the console in a nice tree format. * @group basic + * @since 1.3.0 */ def printSchema(): Unit = println(schema.treeString) /** * Prints the plans (logical and physical) to the console for debugging purposes. * @group basic + * @since 1.3.0 */ def explain(extended: Boolean): Unit = { ExplainCommand( @@ -302,6 +310,7 @@ class DataFrame private[sql]( /** * Only prints the physical plan to the console for debugging purposes. * @group basic + * @since 1.3.0 */ def explain(): Unit = explain(extended = false) @@ -309,6 +318,7 @@ class DataFrame private[sql]( * Returns true if the `collect` and `take` methods can be run locally * (without any Spark executors). * @group basic + * @since 1.3.0 */ def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation] @@ -325,12 +335,14 @@ class DataFrame private[sql]( * @param numRows Number of rows to show * * @group action + * @since 1.3.0 */ def show(numRows: Int): Unit = println(showString(numRows)) /** * Displays the top 20 rows of [[DataFrame]] in a tabular form. * @group action + * @since 1.3.0 */ def show(): Unit = show(20) @@ -342,6 +354,7 @@ class DataFrame private[sql]( * }}} * * @group dfops + * @since 1.3.1 */ def na: DataFrameNaFunctions = new DataFrameNaFunctions(this) @@ -353,6 +366,7 @@ class DataFrame private[sql]( * }}} * * @group dfops + * @since 1.4.0 */ def stat: DataFrameStatFunctions = new DataFrameStatFunctions(this) @@ -363,6 +377,7 @@ class DataFrame private[sql]( * * @param right Right side of the join operation. * @group dfops + * @since 1.3.0 */ def join(right: DataFrame): DataFrame = { Join(logicalPlan, right.logicalPlan, joinType = Inner, None) @@ -386,6 +401,7 @@ class DataFrame private[sql]( * @param right Right side of the join operation. * @param usingColumn Name of the column to join on. This column must exist on both sides. * @group dfops + * @since 1.4.0 */ def join(right: DataFrame, usingColumn: String): DataFrame = { // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right @@ -416,6 +432,7 @@ class DataFrame private[sql]( * df1.join(df2).where($"df1Key" === $"df2Key") * }}} * @group dfops + * @since 1.3.0 */ def join(right: DataFrame, joinExprs: Column): DataFrame = join(right, joinExprs, "inner") @@ -437,6 +454,7 @@ class DataFrame private[sql]( * @param joinExprs Join expression. * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`. * @group dfops + * @since 1.3.0 */ def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = { // Note that in this function, we introduce a hack in the case of self-join to automatically @@ -483,6 +501,7 @@ class DataFrame private[sql]( * df.sort($"sortcol".asc) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def sort(sortCol: String, sortCols: String*): DataFrame = { @@ -495,6 +514,7 @@ class DataFrame private[sql]( * df.sort($"col1", $"col2".desc) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def sort(sortExprs: Column*): DataFrame = { @@ -513,6 +533,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] sorted by the given expressions. * This is an alias of the `sort` function. * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def orderBy(sortCol: String, sortCols: String*): DataFrame = sort(sortCol, sortCols :_*) @@ -521,6 +542,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] sorted by the given expressions. * This is an alias of the `sort` function. * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def orderBy(sortExprs: Column*): DataFrame = sort(sortExprs :_*) @@ -528,12 +550,14 @@ class DataFrame private[sql]( /** * Selects column based on the column name and return it as a [[Column]]. * @group dfops + * @since 1.3.0 */ def apply(colName: String): Column = col(colName) /** * Selects column based on the column name and return it as a [[Column]]. * @group dfops + * @since 1.3.0 */ def col(colName: String): Column = colName match { case "*" => @@ -546,12 +570,14 @@ class DataFrame private[sql]( /** * Returns a new [[DataFrame]] with an alias set. * @group dfops + * @since 1.3.0 */ def as(alias: String): DataFrame = Subquery(alias, logicalPlan) /** * (Scala-specific) Returns a new [[DataFrame]] with an alias set. * @group dfops + * @since 1.3.0 */ def as(alias: Symbol): DataFrame = as(alias.name) @@ -561,6 +587,7 @@ class DataFrame private[sql]( * df.select($"colA", $"colB" + 1) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def select(cols: Column*): DataFrame = { @@ -583,6 +610,7 @@ class DataFrame private[sql]( * df.select($"colA", $"colB") * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) :_*) @@ -595,6 +623,7 @@ class DataFrame private[sql]( * df.selectExpr("colA", "colB as newName", "abs(colC)") * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def selectExpr(exprs: String*): DataFrame = { @@ -612,6 +641,7 @@ class DataFrame private[sql]( * peopleDf($"age" > 15) * }}} * @group dfops + * @since 1.3.0 */ def filter(condition: Column): DataFrame = Filter(condition.expr, logicalPlan) @@ -621,6 +651,7 @@ class DataFrame private[sql]( * peopleDf.filter("age > 15") * }}} * @group dfops + * @since 1.3.0 */ def filter(conditionExpr: String): DataFrame = { filter(Column(new SqlParser().parseExpression(conditionExpr))) @@ -635,6 +666,7 @@ class DataFrame private[sql]( * peopleDf($"age" > 15) * }}} * @group dfops + * @since 1.3.0 */ def where(condition: Column): DataFrame = filter(condition) @@ -653,6 +685,7 @@ class DataFrame private[sql]( * )) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr)) @@ -675,6 +708,7 @@ class DataFrame private[sql]( * )) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def groupBy(col1: String, cols: String*): GroupedData = { @@ -690,6 +724,7 @@ class DataFrame private[sql]( * df.groupBy().agg("age" -> "max", "salary" -> "avg") * }}} * @group dfops + * @since 1.3.0 */ def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = { groupBy().agg(aggExpr, aggExprs :_*) @@ -703,6 +738,7 @@ class DataFrame private[sql]( * df.groupBy().agg(Map("age" -> "max", "salary" -> "avg")) * }}} * @group dfops + * @since 1.3.0 */ def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs) @@ -714,6 +750,7 @@ class DataFrame private[sql]( * df.groupBy().agg(Map("age" -> "max", "salary" -> "avg")) * }}} * @group dfops + * @since 1.3.0 */ def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs) @@ -725,6 +762,7 @@ class DataFrame private[sql]( * df.groupBy().agg(max($"age"), avg($"salary")) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs :_*) @@ -733,6 +771,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] by taking the first `n` rows. The difference between this function * and `head` is that `head` returns an array while `limit` returns a new [[DataFrame]]. * @group dfops + * @since 1.3.0 */ def limit(n: Int): DataFrame = Limit(Literal(n), logicalPlan) @@ -740,6 +779,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing union of rows in this frame and another frame. * This is equivalent to `UNION ALL` in SQL. * @group dfops + * @since 1.3.0 */ def unionAll(other: DataFrame): DataFrame = Union(logicalPlan, other.logicalPlan) @@ -747,6 +787,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing rows only in both this frame and another frame. * This is equivalent to `INTERSECT` in SQL. * @group dfops + * @since 1.3.0 */ def intersect(other: DataFrame): DataFrame = Intersect(logicalPlan, other.logicalPlan) @@ -754,6 +795,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing rows in this frame but not in another frame. * This is equivalent to `EXCEPT` in SQL. * @group dfops + * @since 1.3.0 */ def except(other: DataFrame): DataFrame = Except(logicalPlan, other.logicalPlan) @@ -764,6 +806,7 @@ class DataFrame private[sql]( * @param fraction Fraction of rows to generate. * @param seed Seed for sampling. * @group dfops + * @since 1.3.0 */ def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = { Sample(0.0, fraction, withReplacement, seed, logicalPlan) @@ -775,6 +818,7 @@ class DataFrame private[sql]( * @param withReplacement Sample with replacement or not. * @param fraction Fraction of rows to generate. * @group dfops + * @since 1.3.0 */ def sample(withReplacement: Boolean, fraction: Double): DataFrame = { sample(withReplacement, fraction, Utils.random.nextLong) @@ -786,6 +830,7 @@ class DataFrame private[sql]( * @param weights weights for splits, will be normalized if they don't sum to 1. * @param seed Seed for sampling. * @group dfops + * @since 1.4.0 */ def randomSplit(weights: Array[Double], seed: Long): Array[DataFrame] = { val sum = weights.sum @@ -800,6 +845,7 @@ class DataFrame private[sql]( * * @param weights weights for splits, will be normalized if they don't sum to 1. * @group dfops + * @since 1.4.0 */ def randomSplit(weights: Array[Double]): Array[DataFrame] = { randomSplit(weights, Utils.random.nextLong) @@ -836,6 +882,7 @@ class DataFrame private[sql]( * val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title")) * }}} * @group dfops + * @since 1.3.0 */ def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = { val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] @@ -860,6 +907,7 @@ class DataFrame private[sql]( * df.explode("words", "word")(words: String => words.split(" ")) * }}} * @group dfops + * @since 1.3.0 */ def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B]) : DataFrame = { @@ -883,6 +931,7 @@ class DataFrame private[sql]( /** * Returns a new [[DataFrame]] by adding a column. * @group dfops + * @since 1.3.0 */ def withColumn(colName: String, col: Column): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -902,6 +951,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] with a column renamed. * This is a no-op if schema doesn't contain existingName. * @group dfops + * @since 1.3.0 */ def withColumnRenamed(existingName: String, newName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -921,6 +971,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] with a column dropped. * This is a no-op if schema doesn't contain column name. * @group dfops + * @since 1.4.0 */ def drop(colName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -940,6 +991,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]]. * This is an alias for `distinct`. * @group dfops + * @since 1.4.0 */ def dropDuplicates(): DataFrame = dropDuplicates(this.columns) @@ -948,6 +1000,7 @@ class DataFrame private[sql]( * the subset of columns. * * @group dfops + * @since 1.4.0 */ def dropDuplicates(colNames: Seq[String]): DataFrame = { val groupCols = colNames.map(resolve) @@ -967,6 +1020,7 @@ class DataFrame private[sql]( * the subset of columns. * * @group dfops + * @since 1.4.0 */ def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq) @@ -991,6 +1045,7 @@ class DataFrame private[sql]( * }}} * * @group action + * @since 1.3.1 */ @scala.annotation.varargs def describe(cols: String*): DataFrame = { @@ -1034,24 +1089,28 @@ class DataFrame private[sql]( /** * Returns the first `n` rows. * @group action + * @since 1.3.0 */ def head(n: Int): Array[Row] = limit(n).collect() /** * Returns the first row. * @group action + * @since 1.3.0 */ def head(): Row = head(1).head /** * Returns the first row. Alias for head(). * @group action + * @since 1.3.0 */ override def first(): Row = head() /** * Returns a new RDD by applying a function to all rows of this DataFrame. * @group rdd + * @since 1.3.0 */ override def map[R: ClassTag](f: Row => R): RDD[R] = rdd.map(f) @@ -1059,12 +1118,14 @@ class DataFrame private[sql]( * Returns a new RDD by first applying a function to all rows of this [[DataFrame]], * and then flattening the results. * @group rdd + * @since 1.3.0 */ override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = rdd.flatMap(f) /** * Returns a new RDD by applying a function to each partition of this DataFrame. * @group rdd + * @since 1.3.0 */ override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = { rdd.mapPartitions(f) @@ -1073,42 +1134,49 @@ class DataFrame private[sql]( /** * Applies a function `f` to all rows. * @group rdd + * @since 1.3.0 */ override def foreach(f: Row => Unit): Unit = rdd.foreach(f) /** * Applies a function f to each partition of this [[DataFrame]]. * @group rdd + * @since 1.3.0 */ override def foreachPartition(f: Iterator[Row] => Unit): Unit = rdd.foreachPartition(f) /** * Returns the first `n` rows in the [[DataFrame]]. * @group action + * @since 1.3.0 */ override def take(n: Int): Array[Row] = head(n) /** * Returns an array that contains all of [[Row]]s in this [[DataFrame]]. * @group action + * @since 1.3.0 */ override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect() /** * Returns a Java list that contains all of [[Row]]s in this [[DataFrame]]. * @group action + * @since 1.3.0 */ override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() :_*) /** * Returns the number of rows in the [[DataFrame]]. * @group action + * @since 1.3.0 */ override def count(): Long = groupBy().count().collect().head.getLong(0) /** * Returns a new [[DataFrame]] that has exactly `numPartitions` partitions. * @group rdd + * @since 1.3.0 */ override def repartition(numPartitions: Int): DataFrame = { Repartition(numPartitions, shuffle = true, logicalPlan) @@ -1120,6 +1188,7 @@ class DataFrame private[sql]( * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of * the 100 new partitions will claim 10 of the current partitions. * @group rdd + * @since 1.4.0 */ override def coalesce(numPartitions: Int): DataFrame = { Repartition(numPartitions, shuffle = false, logicalPlan) @@ -1129,11 +1198,13 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]]. * This is an alias for `dropDuplicates`. * @group dfops + * @since 1.3.0 */ override def distinct: DataFrame = Distinct(logicalPlan) /** * @group basic + * @since 1.3.0 */ override def persist(): this.type = { sqlContext.cacheManager.cacheQuery(this) @@ -1142,11 +1213,13 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def cache(): this.type = persist() /** * @group basic + * @since 1.3.0 */ override def persist(newLevel: StorageLevel): this.type = { sqlContext.cacheManager.cacheQuery(this, None, newLevel) @@ -1155,6 +1228,7 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def unpersist(blocking: Boolean): this.type = { sqlContext.cacheManager.tryUncacheQuery(this, blocking) @@ -1163,6 +1237,7 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def unpersist(): this.type = unpersist(blocking = false) @@ -1175,6 +1250,7 @@ class DataFrame private[sql]( * memoized. Once called, it won't change even if you change any query planning related Spark SQL * configurations (e.g. `spark.sql.shuffle.partitions`). * @group rdd + * @since 1.3.0 */ lazy val rdd: RDD[Row] = { // use a local variable to make sure the map closure doesn't capture the whole DataFrame @@ -1188,12 +1264,14 @@ class DataFrame private[sql]( /** * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s. * @group rdd + * @since 1.3.0 */ def toJavaRDD: JavaRDD[Row] = rdd.toJavaRDD() /** * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s. * @group rdd + * @since 1.3.0 */ def javaRDD: JavaRDD[Row] = toJavaRDD @@ -1202,6 +1280,7 @@ class DataFrame private[sql]( * temporary table is tied to the [[SQLContext]] that was used to create this DataFrame. * * @group basic + * @since 1.3.0 */ def registerTempTable(tableName: String): Unit = { sqlContext.registerDataFrameAsTable(this, tableName) @@ -1212,6 +1291,7 @@ class DataFrame private[sql]( * Files that are written out using this method can be read back in as a [[DataFrame]] * using the `parquetFile` function in [[SQLContext]]. * @group output + * @since 1.3.0 */ def saveAsParquetFile(path: String): Unit = { if (sqlContext.conf.parquetUseDataSourceApi) { @@ -1235,6 +1315,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String): Unit = { @@ -1254,6 +1335,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String, mode: SaveMode): Unit = { @@ -1281,6 +1363,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String, source: String): Unit = { @@ -1300,6 +1383,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = { @@ -1319,6 +1403,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable( @@ -1340,6 +1425,7 @@ class DataFrame private[sql]( * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. * @group output + * @since 1.4.0 */ @Experimental def saveAsTable( @@ -1365,6 +1451,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable( @@ -1396,6 +1483,7 @@ class DataFrame private[sql]( * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. * @group output + * @since 1.4.0 */ @Experimental def saveAsTable( @@ -1421,6 +1509,7 @@ class DataFrame private[sql]( * using the default data source configured by spark.sql.sources.default and * [[SaveMode.ErrorIfExists]] as the save mode. * @group output + * @since 1.3.0 */ @Experimental def save(path: String): Unit = { @@ -1432,6 +1521,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode, * using the default data source configured by spark.sql.sources.default. * @group output + * @since 1.3.0 */ @Experimental def save(path: String, mode: SaveMode): Unit = { @@ -1444,6 +1534,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source, * using [[SaveMode.ErrorIfExists]] as the save mode. * @group output + * @since 1.3.0 */ @Experimental def save(path: String, source: String): Unit = { @@ -1455,6 +1546,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source and * [[SaveMode]] specified by mode. * @group output + * @since 1.3.0 */ @Experimental def save(path: String, source: String, mode: SaveMode): Unit = { @@ -1466,6 +1558,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame based on the given data source, * [[SaveMode]] specified by mode, and a set of options. * @group output + * @since 1.3.0 */ @Experimental def save( @@ -1480,6 +1573,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source, * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`. * @group output + * @since 1.4.0 */ @Experimental def save( @@ -1496,6 +1590,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame based on the given data source, * [[SaveMode]] specified by mode, and a set of options * @group output + * @since 1.3.0 */ @Experimental def save( @@ -1510,6 +1605,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source, * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`. * @group output + * @since 1.4.0 */ @Experimental def save( @@ -1524,6 +1620,7 @@ class DataFrame private[sql]( * :: Experimental :: * Adds the rows from this RDD to the specified table, optionally overwriting the existing data. * @group output + * @since 1.3.0 */ @Experimental def insertInto(tableName: String, overwrite: Boolean): Unit = { @@ -1536,6 +1633,7 @@ class DataFrame private[sql]( * Adds the rows from this RDD to the specified table. * Throws an exception if the table already exists. * @group output + * @since 1.3.0 */ @Experimental def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false) @@ -1543,6 +1641,7 @@ class DataFrame private[sql]( /** * Returns the content of the [[DataFrame]] as a RDD of JSON strings. * @group rdd + * @since 1.3.0 */ def toJSON: RDD[String] = { val rowSchema = this.schema @@ -1581,6 +1680,7 @@ class DataFrame private[sql]( * given name; if you pass `false`, it will throw if the table already * exists. * @group output + * @since 1.3.0 */ def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = { createJDBCTable(url, table, allowExisting, new Properties()) @@ -1594,6 +1694,7 @@ class DataFrame private[sql]( * given name; if you pass `false`, it will throw if the table already * exists. * @group output + * @since 1.4.0 */ def createJDBCTable( url: String, @@ -1626,6 +1727,7 @@ class DataFrame private[sql]( * the RDD in order via the simple statement * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail. * @group output + * @since 1.3.0 */ def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = { insertIntoJDBC(url, table, overwrite, new Properties()) @@ -1643,6 +1745,7 @@ class DataFrame private[sql]( * the RDD in order via the simple statement * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail. * @group output + * @since 1.4.0 */ def insertIntoJDBC( url: String, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala index a3187fe323..b87efb58d5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql /** * A container for a [[DataFrame]], used for implicit conversions. + * + * @since 1.3.0 */ private[sql] case class DataFrameHolder(df: DataFrame) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala index 4a54120ba8..b4c2daa055 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala @@ -30,12 +30,16 @@ import org.apache.spark.sql.types._ /** * :: Experimental :: * Functionality for working with missing data in [[DataFrame]]s. + * + * @since 1.3.1 */ @Experimental final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that drops rows containing any null values. + * + * @since 1.3.1 */ def drop(): DataFrame = drop("any", df.columns) @@ -44,18 +48,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values. * If `how` is "all", then drop rows only if every column is null for that row. + * + * @since 1.3.1 */ def drop(how: String): DataFrame = drop(how, df.columns) /** * Returns a new [[DataFrame]] that drops rows containing any null values * in the specified columns. + * + * @since 1.3.1 */ def drop(cols: Array[String]): DataFrame = drop(cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame ]] that drops rows containing any null values * in the specified columns. + * + * @since 1.3.1 */ def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols) @@ -65,6 +75,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values in the specified columns. * If `how` is "all", then drop rows only if every specified column is null for that row. + * + * @since 1.3.1 */ def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq) @@ -74,6 +86,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values in the specified columns. * If `how` is "all", then drop rows only if every specified column is null for that row. + * + * @since 1.3.1 */ def drop(how: String, cols: Seq[String]): DataFrame = { how.toLowerCase match { @@ -85,18 +99,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null values. + * + * @since 1.3.1 */ def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns) /** * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null * values in the specified columns. + * + * @since 1.3.1 */ def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than * `minNonNulls` non-null values in the specified columns. + * + * @since 1.3.1 */ def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = { // Filtering condition -- only keep the row if it has at least `minNonNulls` non-null values. @@ -106,23 +126,31 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that replaces null values in numeric columns with `value`. + * + * @since 1.3.1 */ def fill(value: Double): DataFrame = fill(value, df.columns) /** * Returns a new [[DataFrame ]] that replaces null values in string columns with `value`. + * + * @since 1.3.1 */ def fill(value: String): DataFrame = fill(value, df.columns) /** * Returns a new [[DataFrame]] that replaces null values in specified numeric columns. * If a specified column is not a numeric column, it is ignored. + * + * @since 1.3.1 */ def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in specified * numeric columns. If a specified column is not a numeric column, it is ignored. + * + * @since 1.3.1 */ def fill(value: Double, cols: Seq[String]): DataFrame = { val columnEquals = df.sqlContext.analyzer.resolver @@ -140,12 +168,16 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that replaces null values in specified string columns. * If a specified column is not a string column, it is ignored. + * + * @since 1.3.1 */ def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in * specified string columns. If a specified column is not a string column, it is ignored. + * + * @since 1.3.1 */ def fill(value: String, cols: Seq[String]): DataFrame = { val columnEquals = df.sqlContext.analyzer.resolver @@ -172,6 +204,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * import com.google.common.collect.ImmutableMap; * df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0)); * }}} + * + * @since 1.3.1 */ def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.toSeq) @@ -189,6 +223,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * "B" -> 1.0 * )) * }}} + * + * @since 1.3.1 */ def fill(valueMap: Map[String, Any]): DataFrame = fill0(valueMap.toSeq) @@ -212,6 +248,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param col name of the column to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame = { replace[T](col, replacement.toMap : Map[T, T]) @@ -233,6 +271,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param cols list of columns to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = { replace(cols.toSeq, replacement.toMap) @@ -256,6 +296,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param col name of the column to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](col: String, replacement: Map[T, T]): DataFrame = { if (col == "*") { @@ -279,6 +321,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param cols list of columns to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = replace0(cols, replacement) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index a1e74470af..5d106c1ac2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -23,6 +23,8 @@ import org.apache.spark.sql.execution.stat._ /** * :: Experimental :: * Statistic functions for [[DataFrame]]s. + * + * @since 1.4.0 */ @Experimental final class DataFrameStatFunctions private[sql](df: DataFrame) { @@ -32,6 +34,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col1 the name of the first column * @param col2 the name of the second column * @return the covariance of the two columns. + * + * @since 1.4.0 */ def cov(col1: String, col2: String): Double = { StatFunctions.calculateCov(df, Seq(col1, col2)) @@ -45,6 +49,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col1 the name of the column * @param col2 the name of the column to calculate the correlation against * @return The Pearson Correlation Coefficient as a Double. + * + * @since 1.4.0 */ def corr(col1: String, col2: String, method: String): Double = { require(method == "pearson", "Currently only the calculation of the Pearson Correlation " + @@ -58,6 +64,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col1 the name of the column * @param col2 the name of the column to calculate the correlation against * @return The Pearson Correlation Coefficient as a Double. + * + * @since 1.4.0 */ def corr(col1: String, col2: String): Double = { corr(col1, col2, "pearson") @@ -76,6 +84,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col2 The name of the second column. Distinct items will make the column names * of the DataFrame. * @return A DataFrame containing for the contingency table. + * + * @since 1.4.0 */ def crosstab(col1: String, col2: String): DataFrame = { StatFunctions.crossTabulate(df, col1, col2) @@ -91,6 +101,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param support The minimum frequency for an item to be considered `frequent`. Should be greater * than 1e-4. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Array[String], support: Double): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, support) @@ -104,6 +116,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Array[String]): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, 0.01) @@ -116,6 +130,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Seq[String], support: Double): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, support) @@ -129,6 +145,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Seq[String]): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, 0.01) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala index d5d7e35a6b..717709e4f9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala @@ -27,6 +27,8 @@ import org.apache.spark.annotation.Experimental * {{{ * sqlContext.experimental.extraStrategies += ... * }}} + * + * @since 1.3.0 */ @Experimental class ExperimentalMethods protected[sql](sqlContext: SQLContext) { @@ -34,6 +36,8 @@ class ExperimentalMethods protected[sql](sqlContext: SQLContext) { /** * Allows extra strategies to be injected into the query planner at runtime. Note this API * should be consider experimental and is not intended to be stable across releases. + * + * @since 1.3.0 */ @Experimental var extraStrategies: Seq[Strategy] = Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala index 543320e471..1381b9f1a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala @@ -30,6 +30,8 @@ import org.apache.spark.sql.types.NumericType /** * :: Experimental :: * A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]]. + * + * @since 1.3.0 */ @Experimental class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) { @@ -94,6 +96,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * "expense" -> "sum" * ) * }}} + * + * @since 1.3.0 */ def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = { agg((aggExpr +: aggExprs).toMap) @@ -111,6 +115,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * "expense" -> "sum" * )) * }}} + * + * @since 1.3.0 */ def agg(exprs: Map[String, String]): DataFrame = { exprs.map { case (colName, expr) => @@ -129,6 +135,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * import com.google.common.collect.ImmutableMap; * df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum")); * }}} + * + * @since 1.3.0 */ def agg(exprs: java.util.Map[String, String]): DataFrame = { agg(exprs.toMap) @@ -162,6 +170,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * // Java, 1.3.x: * df.groupBy("department").agg(col("department"), max("age"), sum("expense")); * }}} + * + * @since 1.3.0 */ @scala.annotation.varargs def agg(expr: Column, exprs: Column*): DataFrame = { @@ -183,6 +193,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) /** * Count the number of rows for each group. * The resulting [[DataFrame]] will also contain the grouping columns. + * + * @since 1.3.0 */ def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")()) @@ -190,6 +202,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the average value for each numeric columns for each group. This is an alias for `avg`. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the average values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def mean(colNames: String*): DataFrame = { @@ -200,6 +214,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the max value for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the max values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def max(colNames: String*): DataFrame = { @@ -210,6 +226,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the mean value for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the mean values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def avg(colNames: String*): DataFrame = { @@ -220,6 +238,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the min value for each numeric column for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the min values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def min(colNames: String*): DataFrame = { @@ -230,6 +250,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the sum for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the sum for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def sum(colNames: String*): DataFrame = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala index db484c5f50..1ec874f796 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala @@ -21,11 +21,12 @@ import java.beans.Introspector import java.lang.{Iterable => JIterable} import java.util.{Iterator => JIterator, Map => JMap} +import scala.language.existentials + import com.google.common.reflect.TypeToken import org.apache.spark.sql.types._ -import scala.language.existentials /** * Type-inference utilities for POJOs and Java collections. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 70ba8985d6..975498c11f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -95,6 +95,8 @@ private[spark] class DefaultParserDialect extends ParserDialect { * @groupname config Configuration * @groupname dataframes Custom DataFrame Creation * @groupname Ungrouped Support functions for language integrated queries. + * + * @since 1.0.0 */ class SQLContext(@transient val sparkContext: SparkContext) extends org.apache.spark.Logging @@ -113,6 +115,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Set Spark SQL configuration properties. * * @group config + * @since 1.0.0 */ def setConf(props: Properties): Unit = conf.setConf(props) @@ -120,6 +123,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Set the given Spark SQL configuration property. * * @group config + * @since 1.0.0 */ def setConf(key: String, value: String): Unit = conf.setConf(key, value) @@ -127,6 +131,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Return the value of Spark SQL configuration property for the given key. * * @group config + * @since 1.0.0 */ def getConf(key: String): String = conf.getConf(key) @@ -135,6 +140,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * yet, return `defaultValue`. * * @group config + * @since 1.0.0 */ def getConf(key: String, defaultValue: String): String = conf.getConf(key, defaultValue) @@ -143,6 +149,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * This creates a new copy of the config properties in the form of a Map. * * @group config + * @since 1.0.0 */ def getAllConfs: immutable.Map[String, String] = conf.getAllConfs @@ -228,6 +235,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * the query planner for advanced functionality. * * @group basic + * @since 1.3.0 */ @Experimental @transient @@ -238,6 +246,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns a [[DataFrame]] with no rows or columns. * * @group basic + * @since 1.3.0 */ @Experimental @transient @@ -270,6 +279,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group basic + * @since 1.3.0 * TODO move to SQLSession? */ @transient @@ -278,23 +288,27 @@ class SQLContext(@transient val sparkContext: SparkContext) /** * Returns true if the table is currently cached in-memory. * @group cachemgmt + * @since 1.3.0 */ def isCached(tableName: String): Boolean = cacheManager.isCached(tableName) /** * Caches the specified table in-memory. * @group cachemgmt + * @since 1.3.0 */ def cacheTable(tableName: String): Unit = cacheManager.cacheTable(tableName) /** * Removes the specified table from the in-memory cache. * @group cachemgmt + * @since 1.3.0 */ def uncacheTable(tableName: String): Unit = cacheManager.uncacheTable(tableName) /** * Removes all cached tables from the in-memory cache. + * @since 1.3.0 */ def clearCache(): Unit = cacheManager.clearCache() @@ -311,27 +325,40 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group basic + * @since 1.3.0 */ @Experimental object implicits extends Serializable { // scalastyle:on - /** Converts $"col name" into an [[Column]]. */ + /** + * Converts $"col name" into an [[Column]]. + * @since 1.3.0 + */ implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args :_*)) } } - /** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */ + /** + * An implicit conversion that turns a Scala `Symbol` into a [[Column]]. + * @since 1.3.0 + */ implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name) - /** Creates a DataFrame from an RDD of case classes or tuples. */ + /** + * Creates a DataFrame from an RDD of case classes or tuples. + * @since 1.3.0 + */ implicit def rddToDataFrameHolder[A <: Product : TypeTag](rdd: RDD[A]): DataFrameHolder = { DataFrameHolder(self.createDataFrame(rdd)) } - /** Creates a DataFrame from a local Seq of Product. */ + /** + * Creates a DataFrame from a local Seq of Product. + * @since 1.3.0 + */ implicit def localSeqToDataFrameHolder[A <: Product : TypeTag](data: Seq[A]): DataFrameHolder = { DataFrameHolder(self.createDataFrame(data)) @@ -341,7 +368,10 @@ class SQLContext(@transient val sparkContext: SparkContext) // making existing implicit conversions ambiguous. In particular, RDD[Double] is dangerous // because of [[DoubleRDDFunctions]]. - /** Creates a single column DataFrame from an RDD[Int]. */ + /** + * Creates a single column DataFrame from an RDD[Int]. + * @since 1.3.0 + */ implicit def intRddToDataFrameHolder(data: RDD[Int]): DataFrameHolder = { val dataType = IntegerType val rows = data.mapPartitions { iter => @@ -354,7 +384,10 @@ class SQLContext(@transient val sparkContext: SparkContext) DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil))) } - /** Creates a single column DataFrame from an RDD[Long]. */ + /** + * Creates a single column DataFrame from an RDD[Long]. + * @since 1.3.0 + */ implicit def longRddToDataFrameHolder(data: RDD[Long]): DataFrameHolder = { val dataType = LongType val rows = data.mapPartitions { iter => @@ -367,7 +400,10 @@ class SQLContext(@transient val sparkContext: SparkContext) DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil))) } - /** Creates a single column DataFrame from an RDD[String]. */ + /** + * Creates a single column DataFrame from an RDD[String]. + * @since 1.3.0 + */ implicit def stringRddToDataFrameHolder(data: RDD[String]): DataFrameHolder = { val dataType = StringType val rows = data.mapPartitions { iter => @@ -386,6 +422,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Creates a DataFrame from an RDD of case classes. * * @group dataframes + * @since 1.3.0 */ @Experimental def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = { @@ -401,6 +438,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Creates a DataFrame from a local Seq of Product. * * @group dataframes + * @since 1.3.0 */ @Experimental def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = { @@ -414,6 +452,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]]. * * @group dataframes + * @since 1.3.0 */ def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = { DataFrame(this, LogicalRelation(baseRelation)) @@ -449,6 +488,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group dataframes + * @since 1.3.0 */ @DeveloperApi def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = { @@ -480,6 +520,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * the provided schema. Otherwise, there will be runtime exception. * * @group dataframes + * @since 1.3.0 */ @DeveloperApi def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { @@ -492,6 +533,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * @group dataframes + * @since 1.3.0 */ def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = { val attributeSeq = getSchema(beanClass) @@ -520,6 +562,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * @group dataframes + * @since 1.3.0 */ def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { createDataFrame(rdd.rdd, beanClass) @@ -591,6 +634,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * [[DataFrame]] if no paths are passed in. * * @group specificdata + * @since 1.3.0 */ @scala.annotation.varargs def parquetFile(paths: String*): DataFrame = { @@ -609,6 +653,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It goes through the entire dataset once to determine the schema. * * @group specificdata + * @since 1.3.0 */ def jsonFile(path: String): DataFrame = jsonFile(path, 1.0) @@ -618,6 +663,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonFile(path: String, schema: StructType): DataFrame = @@ -626,6 +672,7 @@ class SQLContext(@transient val sparkContext: SparkContext) /** * :: Experimental :: * @group specificdata + * @since 1.3.0 */ @Experimental def jsonFile(path: String, samplingRatio: Double): DataFrame = @@ -637,6 +684,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It goes through the entire dataset once to determine the schema. * * @group specificdata + * @since 1.3.0 */ def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0) @@ -647,6 +695,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It goes through the entire dataset once to determine the schema. * * @group specificdata + * @since 1.3.0 */ def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0) @@ -656,6 +705,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { @@ -678,6 +728,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * schema, returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { @@ -690,6 +741,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * schema, returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { @@ -711,6 +763,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * schema, returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { @@ -723,6 +776,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * using the default data source configured by spark.sql.sources.default. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(path: String): DataFrame = { @@ -735,6 +789,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the dataset stored at path as a DataFrame, using the given data source. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(path: String, source: String): DataFrame = { @@ -747,6 +802,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(source: String, options: java.util.Map[String, String]): DataFrame = { @@ -759,6 +815,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(source: String, options: Map[String, String]): DataFrame = { @@ -772,6 +829,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -787,6 +845,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -802,6 +861,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * (Scala-specific) Returns the dataset specified by the given data source and * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -817,6 +877,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * (Scala-specific) Returns the dataset specified by the given data source and * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -834,6 +895,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It will use the default data source configured by spark.sql.sources.default. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable(tableName: String, path: String): DataFrame = { @@ -847,6 +909,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * and returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -862,6 +925,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -878,6 +942,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -903,6 +968,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options. Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -920,6 +986,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options. Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -946,6 +1013,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * url named table. * * @group specificdata + * @since 1.3.0 */ @Experimental def jdbc(url: String, table: String): DataFrame = { @@ -958,6 +1026,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * url named table and connection properties. * * @group specificdata + * @since 1.4.0 */ @Experimental def jdbc(url: String, table: String, properties: Properties): DataFrame = { @@ -976,6 +1045,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split * evenly into this many partitions * @group specificdata + * @since 1.3.0 */ @Experimental def jdbc( @@ -1001,6 +1071,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * evenly into this many partitions * @param properties connection properties * @group specificdata + * @since 1.4.0 */ @Experimental def jdbc( @@ -1024,6 +1095,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * of the [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = { @@ -1038,6 +1110,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * of the [[DataFrame]]. * * @group specificdata + * @since 1.4.0 */ @Experimental def jdbc( @@ -1075,6 +1148,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * @param tableName the name of the table to be unregistered. * * @group basic + * @since 1.3.0 */ def dropTempTable(tableName: String): Unit = { cacheManager.tryUncacheQuery(table(tableName)) @@ -1086,6 +1160,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * used for SQL parsing can be configured with 'spark.sql.dialect'. * * @group basic + * @since 1.3.0 */ def sql(sqlText: String): DataFrame = { DataFrame(this, parseSql(sqlText)) @@ -1095,6 +1170,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the specified table as a [[DataFrame]]. * * @group ddl_ops + * @since 1.3.0 */ def table(tableName: String): DataFrame = DataFrame(this, catalog.lookupRelation(Seq(tableName))) @@ -1105,6 +1181,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * indicating if a table is a temporary one or not). * * @group ddl_ops + * @since 1.3.0 */ def tables(): DataFrame = { DataFrame(this, ShowTablesCommand(None)) @@ -1116,6 +1193,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * indicating if a table is a temporary one or not). * * @group ddl_ops + * @since 1.3.0 */ def tables(databaseName: String): DataFrame = { DataFrame(this, ShowTablesCommand(Some(databaseName))) @@ -1125,6 +1203,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the names of tables in the current database as an array. * * @group ddl_ops + * @since 1.3.0 */ def tableNames(): Array[String] = { catalog.getTables(None).map { @@ -1136,6 +1215,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the names of tables in the given database as an array. * * @group ddl_ops + * @since 1.3.0 */ def tableNames(databaseName: String): Array[String] = { catalog.getTables(Some(databaseName)).map { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala index 5921eaf5e6..6b1ae81972 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql - import scala.util.parsing.combinator.RegexParsers import org.apache.spark.sql.catalyst.AbstractSparkSQLParser diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index b97aaf7352..dc3389c41b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -33,6 +33,8 @@ import org.apache.spark.sql.types.DataType /** * Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this. + * + * @since 1.3.0 */ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { @@ -87,6 +89,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of ${x} arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -104,6 +107,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { println(s""" |/** | * Register a user-defined function with ${i} arguments. + | * @since 1.3.0 | */ |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType) = { | functionRegistry.registerFunction( @@ -116,6 +120,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 0 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -127,6 +132,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 1 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -138,6 +144,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 2 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -149,6 +156,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 3 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -160,6 +168,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 4 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -171,6 +180,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 5 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -182,6 +192,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 6 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -193,6 +204,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 7 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -204,6 +216,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 8 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -215,6 +228,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 9 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -226,6 +240,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 10 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -237,6 +252,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 11 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -248,6 +264,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 12 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -259,6 +276,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 13 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -270,6 +288,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 14 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -281,6 +300,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 15 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -292,6 +312,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 16 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -303,6 +324,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 17 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -314,6 +336,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 18 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -325,6 +348,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 19 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -336,6 +360,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 20 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -347,6 +372,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 21 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -358,6 +384,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 22 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -371,6 +398,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 1 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF1[_, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -380,6 +408,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 2 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF2[_, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -389,6 +418,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 3 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF3[_, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -398,6 +428,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 4 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -407,6 +438,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 5 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -416,6 +448,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 6 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -425,6 +458,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 7 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -434,6 +468,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 8 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -443,6 +478,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 9 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -452,6 +488,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 10 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -461,6 +498,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 11 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -470,6 +508,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 12 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -479,6 +518,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 13 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -488,6 +528,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 14 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -497,6 +538,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 15 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -506,6 +548,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 16 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -515,6 +558,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 17 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -524,6 +568,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 18 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -533,6 +578,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 19 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -542,6 +588,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 20 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -551,6 +598,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 21 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -560,6 +608,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 22 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala index 295db539ad..505ab1301e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import java.util.{List => JList, Map => JMap} import org.apache.spark.Accumulator +import org.apache.spark.annotation.Experimental import org.apache.spark.api.python.PythonBroadcast import org.apache.spark.broadcast.Broadcast import org.apache.spark.sql.catalyst.expressions.ScalaUdf @@ -36,7 +37,10 @@ import org.apache.spark.sql.types.DataType * // Projects a column that adds a prediction column based on the score column. * df.select( predict(df("score")) ) * }}} + * + * @since 1.3.0 */ +@Experimental case class UserDefinedFunction protected[sql] (f: AnyRef, dataType: DataType) { def apply(exprs: Column*): Column = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index fae4bd0fd2..215787e40b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -38,6 +38,7 @@ import org.apache.spark.util.Utils * @groupname normal_funcs Non-aggregate functions * @groupname math_funcs Math functions * @groupname Ungrouped Support functions for DataFrames. + * @since 1.3.0 */ @Experimental // scalastyle:off @@ -50,6 +51,7 @@ object functions { * Returns a [[Column]] based on the given column name. * * @group normal_funcs + * @since 1.3.0 */ def col(colName: String): Column = Column(colName) @@ -57,6 +59,7 @@ object functions { * Returns a [[Column]] based on the given column name. Alias of [[col]]. * * @group normal_funcs + * @since 1.3.0 */ def column(colName: String): Column = Column(colName) @@ -68,6 +71,7 @@ object functions { * Otherwise, a new [[Column]] is created to represent the literal value. * * @group normal_funcs + * @since 1.3.0 */ def lit(literal: Any): Column = { literal match { @@ -92,6 +96,7 @@ object functions { * }}} * * @group sort_funcs + * @since 1.3.0 */ def asc(columnName: String): Column = Column(columnName).asc @@ -103,6 +108,7 @@ object functions { * }}} * * @group sort_funcs + * @since 1.3.0 */ def desc(columnName: String): Column = Column(columnName).desc @@ -114,6 +120,7 @@ object functions { * Aggregate function: returns the sum of all values in the expression. * * @group agg_funcs + * @since 1.3.0 */ def sum(e: Column): Column = Sum(e.expr) @@ -121,6 +128,7 @@ object functions { * Aggregate function: returns the sum of all values in the given column. * * @group agg_funcs + * @since 1.3.0 */ def sum(columnName: String): Column = sum(Column(columnName)) @@ -128,6 +136,7 @@ object functions { * Aggregate function: returns the sum of distinct values in the expression. * * @group agg_funcs + * @since 1.3.0 */ def sumDistinct(e: Column): Column = SumDistinct(e.expr) @@ -135,6 +144,7 @@ object functions { * Aggregate function: returns the sum of distinct values in the expression. * * @group agg_funcs + * @since 1.3.0 */ def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName)) @@ -142,6 +152,7 @@ object functions { * Aggregate function: returns the number of items in a group. * * @group agg_funcs + * @since 1.3.0 */ def count(e: Column): Column = e.expr match { // Turn count(*) into count(1) @@ -153,6 +164,7 @@ object functions { * Aggregate function: returns the number of items in a group. * * @group agg_funcs + * @since 1.3.0 */ def count(columnName: String): Column = count(Column(columnName)) @@ -160,6 +172,7 @@ object functions { * Aggregate function: returns the number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ @scala.annotation.varargs def countDistinct(expr: Column, exprs: Column*): Column = @@ -169,6 +182,7 @@ object functions { * Aggregate function: returns the number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ @scala.annotation.varargs def countDistinct(columnName: String, columnNames: String*): Column = @@ -178,6 +192,7 @@ object functions { * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr) @@ -185,6 +200,7 @@ object functions { * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName)) @@ -192,6 +208,7 @@ object functions { * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd) @@ -199,6 +216,7 @@ object functions { * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(columnName: String, rsd: Double): Column = { approxCountDistinct(Column(columnName), rsd) @@ -208,6 +226,7 @@ object functions { * Aggregate function: returns the average of the values in a group. * * @group agg_funcs + * @since 1.3.0 */ def avg(e: Column): Column = Average(e.expr) @@ -215,6 +234,7 @@ object functions { * Aggregate function: returns the average of the values in a group. * * @group agg_funcs + * @since 1.3.0 */ def avg(columnName: String): Column = avg(Column(columnName)) @@ -222,6 +242,7 @@ object functions { * Aggregate function: returns the first value in a group. * * @group agg_funcs + * @since 1.3.0 */ def first(e: Column): Column = First(e.expr) @@ -229,6 +250,7 @@ object functions { * Aggregate function: returns the first value of a column in a group. * * @group agg_funcs + * @since 1.3.0 */ def first(columnName: String): Column = first(Column(columnName)) @@ -236,6 +258,7 @@ object functions { * Aggregate function: returns the last value in a group. * * @group agg_funcs + * @since 1.3.0 */ def last(e: Column): Column = Last(e.expr) @@ -243,6 +266,7 @@ object functions { * Aggregate function: returns the last value of the column in a group. * * @group agg_funcs + * @since 1.3.0 */ def last(columnName: String): Column = last(Column(columnName)) @@ -251,6 +275,7 @@ object functions { * Alias for avg. * * @group agg_funcs + * @since 1.4.0 */ def mean(e: Column): Column = avg(e) @@ -259,6 +284,7 @@ object functions { * Alias for avg. * * @group agg_funcs + * @since 1.4.0 */ def mean(columnName: String): Column = avg(columnName) @@ -266,6 +292,7 @@ object functions { * Aggregate function: returns the minimum value of the expression in a group. * * @group agg_funcs + * @since 1.3.0 */ def min(e: Column): Column = Min(e.expr) @@ -273,6 +300,7 @@ object functions { * Aggregate function: returns the minimum value of the column in a group. * * @group agg_funcs + * @since 1.3.0 */ def min(columnName: String): Column = min(Column(columnName)) @@ -280,6 +308,7 @@ object functions { * Aggregate function: returns the maximum value of the expression in a group. * * @group agg_funcs + * @since 1.3.0 */ def max(e: Column): Column = Max(e.expr) @@ -287,6 +316,7 @@ object functions { * Aggregate function: returns the maximum value of the column in a group. * * @group agg_funcs + * @since 1.3.0 */ def max(columnName: String): Column = max(Column(columnName)) @@ -298,6 +328,7 @@ object functions { * Computes the absolute value. * * @group normal_funcs + * @since 1.3.0 */ def abs(e: Column): Column = Abs(e.expr) @@ -305,6 +336,7 @@ object functions { * Creates a new array column. The input columns must all have the same data type. * * @group normal_funcs + * @since 1.4.0 */ @scala.annotation.varargs def array(cols: Column*): Column = CreateArray(cols.map(_.expr)) @@ -313,6 +345,7 @@ object functions { * Creates a new array column. The input columns must all have the same data type. * * @group normal_funcs + * @since 1.4.0 */ def array(colName: String, colNames: String*): Column = { array((colName +: colNames).map(col) : _*) @@ -325,6 +358,7 @@ object functions { * }}} * * @group normal_funcs + * @since 1.3.0 */ @scala.annotation.varargs def coalesce(e: Column*): Column = Coalesce(e.map(_.expr)) @@ -333,6 +367,7 @@ object functions { * Converts a string exprsesion to lower case. * * @group normal_funcs + * @since 1.3.0 */ def lower(e: Column): Column = Lower(e.expr) @@ -349,6 +384,7 @@ object functions { * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. * * @group normal_funcs + * @since 1.4.0 */ def monotonicallyIncreasingId(): Column = execution.expressions.MonotonicallyIncreasingID() @@ -364,6 +400,7 @@ object functions { * }}} * * @group normal_funcs + * @since 1.3.0 */ def negate(e: Column): Column = -e @@ -378,6 +415,7 @@ object functions { * }}} * * @group normal_funcs + * @since 1.3.0 */ def not(e: Column): Column = !e @@ -385,6 +423,7 @@ object functions { * Generate a random column with i.i.d. samples from U[0.0, 1.0]. * * @group normal_funcs + * @since 1.4.0 */ def rand(seed: Long): Column = Rand(seed) @@ -392,6 +431,7 @@ object functions { * Generate a random column with i.i.d. samples from U[0.0, 1.0]. * * @group normal_funcs + * @since 1.4.0 */ def rand(): Column = rand(Utils.random.nextLong) @@ -399,6 +439,7 @@ object functions { * Generate a column with i.i.d. samples from the standard normal distribution. * * @group normal_funcs + * @since 1.4.0 */ def randn(seed: Long): Column = Randn(seed) @@ -406,6 +447,7 @@ object functions { * Generate a column with i.i.d. samples from the standard normal distribution. * * @group normal_funcs + * @since 1.4.0 */ def randn(): Column = randn(Utils.random.nextLong) @@ -415,6 +457,7 @@ object functions { * Note that this is indeterministic because it depends on data partitioning and task scheduling. * * @group normal_funcs + * @since 1.4.0 */ def sparkPartitionId(): Column = execution.expressions.SparkPartitionID @@ -422,6 +465,7 @@ object functions { * Computes the square root of the specified float value. * * @group normal_funcs + * @since 1.3.0 */ def sqrt(e: Column): Column = Sqrt(e.expr) @@ -430,6 +474,7 @@ object functions { * a derived column expression that is named (i.e. aliased). * * @group normal_funcs + * @since 1.4.0 */ @scala.annotation.varargs def struct(cols: Column*): Column = { @@ -442,6 +487,7 @@ object functions { * Creates a new struct column that composes multiple input columns. * * @group normal_funcs + * @since 1.4.0 */ def struct(colName: String, colNames: String*): Column = { struct((colName +: colNames).map(col) : _*) @@ -451,14 +497,15 @@ object functions { * Converts a string expression to upper case. * * @group normal_funcs + * @since 1.3.0 */ def upper(e: Column): Column = Upper(e.expr) - /** * Computes bitwise NOT. * * @group normal_funcs + * @since 1.4.0 */ def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr) @@ -471,6 +518,7 @@ object functions { * 0.0 through pi. * * @group math_funcs + * @since 1.4.0 */ def acos(e: Column): Column = Acos(e.expr) @@ -479,6 +527,7 @@ object functions { * 0.0 through pi. * * @group math_funcs + * @since 1.4.0 */ def acos(columnName: String): Column = acos(Column(columnName)) @@ -487,6 +536,7 @@ object functions { * -pi/2 through pi/2. * * @group math_funcs + * @since 1.4.0 */ def asin(e: Column): Column = Asin(e.expr) @@ -495,6 +545,7 @@ object functions { * -pi/2 through pi/2. * * @group math_funcs + * @since 1.4.0 */ def asin(columnName: String): Column = asin(Column(columnName)) @@ -502,6 +553,7 @@ object functions { * Computes the tangent inverse of the given value. * * @group math_funcs + * @since 1.4.0 */ def atan(e: Column): Column = Atan(e.expr) @@ -509,6 +561,7 @@ object functions { * Computes the tangent inverse of the given column. * * @group math_funcs + * @since 1.4.0 */ def atan(columnName: String): Column = atan(Column(columnName)) @@ -517,6 +570,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(l: Column, r: Column): Column = Atan2(l.expr, r.expr) @@ -525,6 +579,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(l: Column, rightName: String): Column = atan2(l, Column(rightName)) @@ -533,6 +588,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(leftName: String, r: Column): Column = atan2(Column(leftName), r) @@ -541,6 +597,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(leftName: String, rightName: String): Column = atan2(Column(leftName), Column(rightName)) @@ -550,6 +607,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(l: Column, r: Double): Column = atan2(l, lit(r).expr) @@ -558,6 +616,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(leftName: String, r: Double): Column = atan2(Column(leftName), r) @@ -566,6 +625,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(l: Double, r: Column): Column = atan2(lit(l).expr, r) @@ -574,6 +634,7 @@ object functions { * polar coordinates (r, theta). * * @group math_funcs + * @since 1.4.0 */ def atan2(l: Double, rightName: String): Column = atan2(l, Column(rightName)) @@ -581,6 +642,7 @@ object functions { * Computes the cube-root of the given value. * * @group math_funcs + * @since 1.4.0 */ def cbrt(e: Column): Column = Cbrt(e.expr) @@ -588,6 +650,7 @@ object functions { * Computes the cube-root of the given column. * * @group math_funcs + * @since 1.4.0 */ def cbrt(columnName: String): Column = cbrt(Column(columnName)) @@ -595,6 +658,7 @@ object functions { * Computes the ceiling of the given value. * * @group math_funcs + * @since 1.4.0 */ def ceil(e: Column): Column = Ceil(e.expr) @@ -602,6 +666,7 @@ object functions { * Computes the ceiling of the given column. * * @group math_funcs + * @since 1.4.0 */ def ceil(columnName: String): Column = ceil(Column(columnName)) @@ -609,6 +674,7 @@ object functions { * Computes the cosine of the given value. * * @group math_funcs + * @since 1.4.0 */ def cos(e: Column): Column = Cos(e.expr) @@ -616,6 +682,7 @@ object functions { * Computes the cosine of the given column. * * @group math_funcs + * @since 1.4.0 */ def cos(columnName: String): Column = cos(Column(columnName)) @@ -623,6 +690,7 @@ object functions { * Computes the hyperbolic cosine of the given value. * * @group math_funcs + * @since 1.4.0 */ def cosh(e: Column): Column = Cosh(e.expr) @@ -630,6 +698,7 @@ object functions { * Computes the hyperbolic cosine of the given column. * * @group math_funcs + * @since 1.4.0 */ def cosh(columnName: String): Column = cosh(Column(columnName)) @@ -637,6 +706,7 @@ object functions { * Computes the exponential of the given value. * * @group math_funcs + * @since 1.4.0 */ def exp(e: Column): Column = Exp(e.expr) @@ -644,6 +714,7 @@ object functions { * Computes the exponential of the given column. * * @group math_funcs + * @since 1.4.0 */ def exp(columnName: String): Column = exp(Column(columnName)) @@ -651,6 +722,7 @@ object functions { * Computes the exponential of the given value minus one. * * @group math_funcs + * @since 1.4.0 */ def expm1(e: Column): Column = Expm1(e.expr) @@ -658,6 +730,7 @@ object functions { * Computes the exponential of the given column. * * @group math_funcs + * @since 1.4.0 */ def expm1(columnName: String): Column = expm1(Column(columnName)) @@ -665,6 +738,7 @@ object functions { * Computes the floor of the given value. * * @group math_funcs + * @since 1.4.0 */ def floor(e: Column): Column = Floor(e.expr) @@ -672,6 +746,7 @@ object functions { * Computes the floor of the given column. * * @group math_funcs + * @since 1.4.0 */ def floor(columnName: String): Column = floor(Column(columnName)) @@ -679,6 +754,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(l: Column, r: Column): Column = Hypot(l.expr, r.expr) @@ -686,6 +762,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName)) @@ -693,6 +770,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r) @@ -700,6 +778,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(leftName: String, rightName: String): Column = hypot(Column(leftName), Column(rightName)) @@ -708,6 +787,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(l: Column, r: Double): Column = hypot(l, lit(r).expr) @@ -715,6 +795,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r) @@ -722,6 +803,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(l: Double, r: Column): Column = hypot(lit(l).expr, r) @@ -729,6 +811,7 @@ object functions { * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * * @group math_funcs + * @since 1.4.0 */ def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName)) @@ -736,6 +819,7 @@ object functions { * Computes the natural logarithm of the given value. * * @group math_funcs + * @since 1.4.0 */ def log(e: Column): Column = Log(e.expr) @@ -743,6 +827,7 @@ object functions { * Computes the natural logarithm of the given column. * * @group math_funcs + * @since 1.4.0 */ def log(columnName: String): Column = log(Column(columnName)) @@ -750,6 +835,7 @@ object functions { * Computes the logarithm of the given value in Base 10. * * @group math_funcs + * @since 1.4.0 */ def log10(e: Column): Column = Log10(e.expr) @@ -757,6 +843,7 @@ object functions { * Computes the logarithm of the given value in Base 10. * * @group math_funcs + * @since 1.4.0 */ def log10(columnName: String): Column = log10(Column(columnName)) @@ -764,6 +851,7 @@ object functions { * Computes the natural logarithm of the given value plus one. * * @group math_funcs + * @since 1.4.0 */ def log1p(e: Column): Column = Log1p(e.expr) @@ -771,6 +859,7 @@ object functions { * Computes the natural logarithm of the given column plus one. * * @group math_funcs + * @since 1.4.0 */ def log1p(columnName: String): Column = log1p(Column(columnName)) @@ -778,6 +867,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(l: Column, r: Column): Column = Pow(l.expr, r.expr) @@ -785,6 +875,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(l: Column, rightName: String): Column = pow(l, Column(rightName)) @@ -792,6 +883,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(leftName: String, r: Column): Column = pow(Column(leftName), r) @@ -799,6 +891,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName)) @@ -806,6 +899,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(l: Column, r: Double): Column = pow(l, lit(r).expr) @@ -813,6 +907,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(leftName: String, r: Double): Column = pow(Column(leftName), r) @@ -820,6 +915,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(l: Double, r: Column): Column = pow(lit(l).expr, r) @@ -827,6 +923,7 @@ object functions { * Returns the value of the first argument raised to the power of the second argument. * * @group math_funcs + * @since 1.4.0 */ def pow(l: Double, rightName: String): Column = pow(l, Column(rightName)) @@ -835,6 +932,7 @@ object functions { * is equal to a mathematical integer. * * @group math_funcs + * @since 1.4.0 */ def rint(e: Column): Column = Rint(e.expr) @@ -843,6 +941,7 @@ object functions { * is equal to a mathematical integer. * * @group math_funcs + * @since 1.4.0 */ def rint(columnName: String): Column = rint(Column(columnName)) @@ -850,6 +949,7 @@ object functions { * Computes the signum of the given value. * * @group math_funcs + * @since 1.4.0 */ def signum(e: Column): Column = Signum(e.expr) @@ -857,6 +957,7 @@ object functions { * Computes the signum of the given column. * * @group math_funcs + * @since 1.4.0 */ def signum(columnName: String): Column = signum(Column(columnName)) @@ -864,6 +965,7 @@ object functions { * Computes the sine of the given value. * * @group math_funcs + * @since 1.4.0 */ def sin(e: Column): Column = Sin(e.expr) @@ -871,6 +973,7 @@ object functions { * Computes the sine of the given column. * * @group math_funcs + * @since 1.4.0 */ def sin(columnName: String): Column = sin(Column(columnName)) @@ -878,6 +981,7 @@ object functions { * Computes the hyperbolic sine of the given value. * * @group math_funcs + * @since 1.4.0 */ def sinh(e: Column): Column = Sinh(e.expr) @@ -885,6 +989,7 @@ object functions { * Computes the hyperbolic sine of the given column. * * @group math_funcs + * @since 1.4.0 */ def sinh(columnName: String): Column = sinh(Column(columnName)) @@ -892,6 +997,7 @@ object functions { * Computes the tangent of the given value. * * @group math_funcs + * @since 1.4.0 */ def tan(e: Column): Column = Tan(e.expr) @@ -899,6 +1005,7 @@ object functions { * Computes the tangent of the given column. * * @group math_funcs + * @since 1.4.0 */ def tan(columnName: String): Column = tan(Column(columnName)) @@ -906,6 +1013,7 @@ object functions { * Computes the hyperbolic tangent of the given value. * * @group math_funcs + * @since 1.4.0 */ def tanh(e: Column): Column = Tanh(e.expr) @@ -913,6 +1021,7 @@ object functions { * Computes the hyperbolic tangent of the given column. * * @group math_funcs + * @since 1.4.0 */ def tanh(columnName: String): Column = tanh(Column(columnName)) @@ -920,6 +1029,7 @@ object functions { * Converts an angle measured in radians to an approximately equivalent angle measured in degrees. * * @group math_funcs + * @since 1.4.0 */ def toDegrees(e: Column): Column = ToDegrees(e.expr) @@ -927,6 +1037,7 @@ object functions { * Converts an angle measured in radians to an approximately equivalent angle measured in degrees. * * @group math_funcs + * @since 1.4.0 */ def toDegrees(columnName: String): Column = toDegrees(Column(columnName)) @@ -934,6 +1045,7 @@ object functions { * Converts an angle measured in degrees to an approximately equivalent angle measured in radians. * * @group math_funcs + * @since 1.4.0 */ def toRadians(e: Column): Column = ToRadians(e.expr) @@ -941,6 +1053,7 @@ object functions { * Converts an angle measured in degrees to an approximately equivalent angle measured in radians. * * @group math_funcs + * @since 1.4.0 */ def toRadians(columnName: String): Column = toRadians(Column(columnName)) @@ -960,6 +1073,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -976,6 +1090,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = { ScalaUdf(f, returnType, Seq($argsInUdf)) @@ -988,6 +1103,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -998,6 +1114,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1008,6 +1125,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1018,6 +1136,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1028,6 +1147,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1038,6 +1158,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1048,6 +1169,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1058,6 +1180,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1068,6 +1191,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1078,6 +1202,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1088,6 +1213,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -1100,6 +1226,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function0[_], returnType: DataType): Column = { ScalaUdf(f, returnType, Seq()) @@ -1110,6 +1237,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr)) @@ -1120,6 +1248,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr)) @@ -1130,6 +1259,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr)) @@ -1140,6 +1270,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr)) @@ -1150,6 +1281,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr)) @@ -1160,6 +1292,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr)) @@ -1170,6 +1303,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr)) @@ -1180,6 +1314,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr)) @@ -1190,6 +1325,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr)) @@ -1200,6 +1336,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr)) @@ -1220,6 +1357,7 @@ object functions { * }}} * * @group udf_funcs + * @since 1.4.0 */ def callUdf(udfName: String, cols: Column*): Column = { UnresolvedFunction(udfName, cols.map(_.expr)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala index 791046e007..24e86ca415 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala @@ -19,83 +19,113 @@ package org.apache.spark.sql.sources /** * A filter predicate for data sources. + * + * @since 1.3.0 */ abstract class Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * equal to `value`. + * + * @since 1.3.0 */ case class EqualTo(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * greater than `value`. + * + * @since 1.3.0 */ case class GreaterThan(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * greater than or equal to `value`. + * + * @since 1.3.0 */ case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * less than `value`. + * + * @since 1.3.0 */ case class LessThan(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * less than or equal to `value`. + * + * @since 1.3.0 */ case class LessThanOrEqual(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to one of the values in the array. + * + * @since 1.3.0 */ case class In(attribute: String, values: Array[Any]) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to null. + * + * @since 1.3.0 */ case class IsNull(attribute: String) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a non-null value. + * + * @since 1.3.0 */ case class IsNotNull(attribute: String) extends Filter /** * A filter that evaluates to `true` iff both `left` or `right` evaluate to `true`. + * + * @since 1.3.0 */ case class And(left: Filter, right: Filter) extends Filter /** * A filter that evaluates to `true` iff at least one of `left` or `right` evaluates to `true`. + * + * @since 1.3.0 */ case class Or(left: Filter, right: Filter) extends Filter /** * A filter that evaluates to `true` iff `child` is evaluated to `false`. + * + * @since 1.3.0 */ case class Not(child: Filter) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to * a string that starts with `value`. + * + * @since 1.3.1 */ case class StringStartsWith(attribute: String, value: String) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to * a string that starts with `value`. + * + * @since 1.3.1 */ case class StringEndsWith(attribute: String, value: String) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to * a string that contains the string `value`. + * + * @since 1.3.1 */ case class StringContains(attribute: String, value: String) extends Filter diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 5e010d2112..6f315305c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -43,6 +43,8 @@ import org.apache.spark.sql.types.{StructField, StructType} * data source 'org.apache.spark.sql.json.DefaultSource' * * A new instance of this class with be instantiated each time a DDL call is made. + * + * @since 1.3.0 */ @DeveloperApi trait RelationProvider { @@ -72,6 +74,8 @@ trait RelationProvider { * users need to provide a schema when using a SchemaRelationProvider. * A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]] * if it can support both schema inference and user-specified schemas. + * + * @since 1.3.0 */ @DeveloperApi trait SchemaRelationProvider { @@ -106,6 +110,8 @@ trait SchemaRelationProvider { * using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]], * and [[FSBasedRelationProvider]] if it can support schema inference, user-specified * schemas, and accessing partitioned relations. + * + * @since 1.4.0 */ trait FSBasedRelationProvider { /** @@ -121,6 +127,9 @@ trait FSBasedRelationProvider { parameters: Map[String, String]): FSBasedRelation } +/** + * @since 1.3.0 + */ @DeveloperApi trait CreatableRelationProvider { /** @@ -134,6 +143,8 @@ trait CreatableRelationProvider { * existing data is expected to be overwritten by the contents of the DataFrame. * ErrorIfExists mode means that when saving a DataFrame to a data source, * if data already exists, an exception is expected to be thrown. + * + * @since 1.3.0 */ def createRelation( sqlContext: SQLContext, @@ -152,6 +163,8 @@ trait CreatableRelationProvider { * BaseRelations must also define a equality function that only returns true when the two * instances will return the same data. This equality function is used when determining when * it is safe to substitute cached results for a given relation. + * + * @since 1.3.0 */ @DeveloperApi abstract class BaseRelation { @@ -167,6 +180,8 @@ abstract class BaseRelation { * * Note that it is always better to overestimate size than underestimate, because underestimation * could lead to execution plans that are suboptimal (i.e. broadcasting a very large table). + * + * @since 1.3.0 */ def sizeInBytes: Long = sqlContext.conf.defaultSizeInBytes @@ -177,6 +192,8 @@ abstract class BaseRelation { * * Note: The internal representation is not stable across releases and thus data sources outside * of Spark SQL should leave this as true. + * + * @since 1.4.0 */ def needConversion: Boolean = true } @@ -184,6 +201,8 @@ abstract class BaseRelation { /** * ::DeveloperApi:: * A BaseRelation that can produce all of its tuples as an RDD of Row objects. + * + * @since 1.3.0 */ @DeveloperApi trait TableScan { @@ -194,6 +213,8 @@ trait TableScan { * ::DeveloperApi:: * A BaseRelation that can eliminate unneeded columns before producing an RDD * containing all of its tuples as Row objects. + * + * @since 1.3.0 */ @DeveloperApi trait PrunedScan { @@ -211,6 +232,8 @@ trait PrunedScan { * The pushed down filters are currently purely an optimization as they will all be evaluated * again. This means it is safe to use them with methods that produce false positives such * as filtering partitions based on a bloom filter. + * + * @since 1.3.0 */ @DeveloperApi trait PrunedFilteredScan { @@ -232,6 +255,8 @@ trait PrunedFilteredScan { * 3. It assumes that fields of the data provided in the insert method are nullable. * If a data source needs to check the actual nullability of a field, it needs to do it in the * insert method. + * + * @since 1.3.0 */ @DeveloperApi trait InsertableRelation { @@ -245,6 +270,8 @@ trait InsertableRelation { * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. Unlike the other APIs this * interface is NOT designed to be binary compatible across releases and thus should only be used * for experimentation. + * + * @since 1.3.0 */ @Experimental trait CatalystScan { @@ -257,6 +284,8 @@ trait CatalystScan { * underlying file system. Subclasses of [[OutputWriter]] must provide a zero-argument constructor. * An [[OutputWriter]] instance is created and initialized when a new output file is opened on * executor side. This instance is used to persist rows to this single output file. + * + * @since 1.4.0 */ @Experimental abstract class OutputWriter { @@ -270,6 +299,8 @@ abstract class OutputWriter { * @param dataSchema Schema of the rows to be written. Partition columns are not included in the * schema if the corresponding relation is partitioned. * @param context The Hadoop MapReduce task context. + * + * @since 1.4.0 */ def init( path: String, @@ -279,12 +310,16 @@ abstract class OutputWriter { /** * Persists a single row. Invoked on the executor side. When writing to dynamically partitioned * tables, dynamic partition columns are not included in rows to be written. + * + * @since 1.4.0 */ def write(row: Row): Unit /** * Closes the [[OutputWriter]]. Invoked on the executor side after all rows are persisted, before * the task output is committed. + * + * @since 1.4.0 */ def close(): Unit } @@ -310,6 +345,8 @@ abstract class OutputWriter { * directories of all partition directories. * @param maybePartitionSpec An [[FSBasedRelation]] can be created with an optional * [[PartitionSpec]], so that partition discovery can be skipped. + * + * @since 1.4.0 */ @Experimental abstract class FSBasedRelation private[sql]( @@ -323,6 +360,8 @@ abstract class FSBasedRelation private[sql]( * @param paths Base paths of this relation. For partitioned relations, it should be either root * directories of all partition directories. * @param partitionColumns Partition columns of this relation. + * + * @since 1.4.0 */ def this(paths: Array[String], partitionColumns: StructType) = this(paths, { @@ -335,6 +374,8 @@ abstract class FSBasedRelation private[sql]( * * @param paths Base paths of this relation. For partitioned relations, it should be root * directories of all partition directories. + * + * @since 1.4.0 */ def this(paths: Array[String]) = this(paths, None) @@ -356,6 +397,8 @@ abstract class FSBasedRelation private[sql]( /** * Partition columns. Note that they are always nullable. + * + * @since 1.4.0 */ def partitionColumns: StructType = partitionSpec.partitionColumns @@ -385,6 +428,8 @@ abstract class FSBasedRelation private[sql]( /** * Schema of this relation. It consists of columns appearing in [[dataSchema]] and all partition * columns not appearing in [[dataSchema]]. + * + * @since 1.4.0 */ override lazy val schema: StructType = { val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet @@ -396,6 +441,8 @@ abstract class FSBasedRelation private[sql]( /** * Specifies schema of actual data files. For partitioned relations, if one or more partitioned * columns are contained in the data files, they should also appear in `dataSchema`. + * + * @since 1.4.0 */ def dataSchema: StructType @@ -407,6 +454,8 @@ abstract class FSBasedRelation private[sql]( * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the * relation. For a partitioned relation, it contains paths of all data files in a single * selected partition. + * + * @since 1.4.0 */ def buildScan(inputPaths: Array[String]): RDD[Row] = { throw new RuntimeException( @@ -422,6 +471,8 @@ abstract class FSBasedRelation private[sql]( * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the * relation. For a partitioned relation, it contains paths of all data files in a single * selected partition. + * + * @since 1.4.0 */ def buildScan(requiredColumns: Array[String], inputPaths: Array[String]): RDD[Row] = { // Yeah, to workaround serialization... @@ -458,6 +509,8 @@ abstract class FSBasedRelation private[sql]( * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the * relation. For a partitioned relation, it contains paths of all data files in a single * selected partition. + * + * @since 1.4.0 */ def buildScan( requiredColumns: Array[String], @@ -473,12 +526,16 @@ abstract class FSBasedRelation private[sql]( * Note that the only side effect expected here is mutating `job` via its setters. Especially, * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states * may cause unexpected behaviors. + * + * @since 1.4.0 */ def prepareForWrite(job: Job): Unit = () /** * This method is responsible for producing a new [[OutputWriter]] for each newly opened output * file on the executor side. + * + * @since 1.4.0 */ def outputWriterClass: Class[_ <: OutputWriter] } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 61e8c154e8..766c42d040 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -63,6 +63,8 @@ private[hive] class HiveQLDialect extends ParserDialect { /** * An instance of the Spark SQL execution engine that integrates with data stored in Hive. * Configuration for Hive is read from hive-site.xml on the classpath. + * + * @since 1.0.0 */ class HiveContext(sc: SparkContext) extends SQLContext(sc) { self => @@ -225,6 +227,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * Spark SQL or the external data source library it uses might cache certain metadata about a * table, such as the location of blocks. When those change outside of Spark SQL, users should * call this function to invalidate the cache. + * + * @since 1.3.0 */ def refreshTable(tableName: String): Unit = { // TODO: Database support... @@ -242,6 +246,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * * Right now, it only supports Hive tables and it only updates the size of a Hive table * in the Hive metastore. + * + * @since 1.2.0 */ @Experimental def analyze(tableName: String) { -- GitLab