diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala index 11399a7633638f8817c28c36bd353903d24f7667..08a93595a2e17e3130d7f195d969d8f1146c9387 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala @@ -173,7 +173,9 @@ object LDAExample { stopwordFile: String): (RDD[(Long, Vector)], Array[String], Long) = { // Get dataset of document texts - // One document per line in each text file. + // One document per line in each text file. If the input consists of many small files, + // this can result in a large number of small partitions, which can degrade performance. + // In this case, consider using coalesce() to create fewer, larger partitions. val textRDD: RDD[String] = sc.textFile(paths.mkString(",")) // Split text into words