diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java index 52e3b62b79dd201dd1bc35587576b93f115dbbba..fc92446783381442cea98e6db612c8442c44d6fe 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java @@ -19,10 +19,13 @@ package org.apache.spark.examples.sql; // $example on:schema_merging$ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; // $example off:schema_merging$ // $example on:basic_parquet_example$ +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; // $example on:schema_merging$ @@ -213,6 +216,19 @@ public class JavaSQLDataSourceExample { // +------+ // |Justin| // +------+ + + // Alternatively, a DataFrame can be created for a JSON dataset represented by + // an RDD[String] storing one JSON object per string. + List<String> jsonData = Arrays.asList( + "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}"); + JavaRDD<String> anotherPeopleRDD = new JavaSparkContext(spark.sparkContext()).parallelize(jsonData); + Dataset anotherPeople = spark.read().json(anotherPeopleRDD); + anotherPeople.show(); + // +---------------+----+ + // | address|name| + // +---------------+----+ + // |[Columbus,Ohio]| Yin| + // +---------------+----+ // $example off:json_dataset$ }