diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 6ca56b3af63f38054a27b0a2603a2c14e653b59d..c3b770a42cba0df544d29bcb88ec11465a019039 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -275,7 +275,7 @@ private[spark] object PythonRDD { * Returns the StorageLevel with the given string name. * Throws an exception if the name is not a valid StorageLevel. */ - def getStorageLevel(name: String) : StorageLevel = { + def getStorageLevelByName(name: String) : StorageLevel = { // In Scala, "val MEMORY_ONLY" produces a public getter by the same name. val storageLevelGetter = StorageLevel.getClass().getDeclaredMethod(name) return storageLevelGetter.invoke(StorageLevel).asInstanceOf[StorageLevel] diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 49f9b4610d4223445c0a5c57163ffffe323b9628..514d56e2003103ac7646753145bcbca8905e6ea8 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -281,16 +281,23 @@ class SparkContext(object): class StorageLevelReader: """ - Mimics the Scala StorageLevel by directing all attribute requests + Mimics the Scala StorageLevel by delegating all attribute requests (e.g., StorageLevel.DISK_ONLY) to the JVM for reflection. + Memoizes results to reduce JVM call/memory overheads. """ def __init__(self, sc): self.sc = sc + self.memoized = {} def __getattr__(self, name): + if name in self.memoized: + return self.memoized[name] + try: - return self.sc._jvm.PythonRDD.getStorageLevel(name) + storageLevel = self.sc._jvm.PythonRDD.getStorageLevelByName(name) + self.memoized[name] = storageLevel + return storageLevel except: print "Failed to find StorageLevel:", name