diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 78a167eef2e4e3218fab08d6b8c2cff05bf06cdb..e5140fcf1337ec78a9103e895b2c2d650f7be920 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1556,30 +1556,33 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * (Scala-specific) Returns a new [[Dataset]] where each row has been expanded to zero or more * rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of * the input row are implicitly joined with each row that is output by the function. * - * The following example uses this function to count the number of books which contain - * a given word: + * Given that this is deprecated, as an alternative, you can explode columns either using + * `functions.explode()` or `flatMap()`. The following example uses these alternatives to count + * the number of books that contain a given word: * * {{{ * case class Book(title: String, words: String) * val ds: Dataset[Book] * - * case class Word(word: String) - * val allWords = ds.explode('words) { - * case Row(words: String) => words.split(" ").map(Word(_)) - * } + * val allWords = ds.select('title, explode(split('words, " ")).as("word")) * * val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title")) * }}} * + * Using `flatMap()` this can similarly be exploded as: + * + * {{{ + * ds.flatMap(_.words.split(" ")) + * }}} + * * @group untypedrel * @since 2.0.0 */ - @Experimental + @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0") def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = { val elementSchema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] @@ -1596,19 +1599,27 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * (Scala-specific) Returns a new [[Dataset]] where a single column has been expanded to zero * or more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All * columns of the input row are implicitly joined with each value that is output by the function. * + * Given that this is deprecated, as an alternative, you can explode columns either using + * `functions.explode()`: + * + * {{{ + * ds.select(explode(split('words, " ")).as("word")) + * }}} + * + * or `flatMap()`: + * * {{{ - * ds.explode("words", "word") {words: String => words.split(" ")} + * ds.flatMap(_.words.split(" ")) * }}} * * @group untypedrel * @since 2.0.0 */ - @Experimental + @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0") def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B]) : DataFrame = { val dataType = ScalaReflection.schemaFor[B].dataType