From 635fb30f83a66cc56f5fecfed5bff77873bf49a6 Mon Sep 17 00:00:00 2001 From: Sameer Agarwal <sameer@databricks.com> Date: Fri, 27 May 2016 11:11:31 -0700 Subject: [PATCH] [SPARK-15599][SQL][DOCS] API docs for `createDataset` functions in SparkSession ## What changes were proposed in this pull request? Adds API docs and usage examples for the 3 `createDataset` calls in `SparkSession` ## How was this patch tested? N/A Author: Sameer Agarwal <sameer@databricks.com> Closes #13345 from sameeragarwal/dataset-doc. --- .../org/apache/spark/sql/SparkSession.scala | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 5dabe0e83c..aa60048405 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -376,6 +376,40 @@ class SparkSession private( Dataset.ofRows(self, LogicalRelation(baseRelation)) } + /* ------------------------------- * + | Methods for creating DataSets | + * ------------------------------- */ + + /** + * :: Experimental :: + * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an + * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) + * that is generally created automatically through implicits from a `SparkSession`, or can be + * created explicitly by calling static methods on [[Encoders]]. + * + * == Example == + * + * {{{ + * + * import spark.implicits._ + * case class Person(name: String, age: Long) + * val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19)) + * val ds = spark.createDataset(data) + * + * ds.show() + * // +-------+---+ + * // | name|age| + * // +-------+---+ + * // |Michael| 29| + * // | Andy| 30| + * // | Justin| 19| + * // +-------+---+ + * }}} + * + * @since 2.0.0 + * @group dataset + */ + @Experimental def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = { val enc = encoderFor[T] val attributes = enc.schema.toAttributes @@ -384,6 +418,17 @@ class SparkSession private( Dataset[T](self, plan) } + /** + * :: Experimental :: + * Creates a [[Dataset]] from an RDD of a given type. This method requires an + * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) + * that is generally created automatically through implicits from a `SparkSession`, or can be + * created explicitly by calling static methods on [[Encoders]]. + * + * @since 2.0.0 + * @group dataset + */ + @Experimental def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = { val enc = encoderFor[T] val attributes = enc.schema.toAttributes @@ -392,6 +437,24 @@ class SparkSession private( Dataset[T](self, plan) } + /** + * :: Experimental :: + * Creates a [[Dataset]] from a [[java.util.List]] of a given type. This method requires an + * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) + * that is generally created automatically through implicits from a `SparkSession`, or can be + * created explicitly by calling static methods on [[Encoders]]. + * + * == Java Example == + * + * {{{ + * List<String> data = Arrays.asList("hello", "world"); + * Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); + * }}} + * + * @since 2.0.0 + * @group dataset + */ + @Experimental def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = { createDataset(data.asScala) } -- GitLab