Skip to content
Snippets Groups Projects
Commit 682b2d93 authored by Matei Zaharia's avatar Matei Zaharia
Browse files

Added a test for when an RDD only partially fits in memory

parent dca496bb
No related branches found
No related tags found
No related merge requests found
......@@ -158,12 +158,28 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
assert(data.count() === 1000)
}
test("compute without caching with low memory") {
test("compute without caching when no partitions fit in memory") {
System.setProperty("spark.storage.memoryFraction", "0.0001")
sc = new SparkContext(clusterUrl, "test")
val data = sc.parallelize(1 to 4000000, 2).persist(StorageLevel.MEMORY_ONLY)
// data will be 4 million * 4 bytes = 16 MB in size, but our memoryFraction set the cache
// to only 50 KB (0.0001 of 512 MB), so no partitions should fit in memory
val data = sc.parallelize(1 to 4000000, 2).persist(StorageLevel.MEMORY_ONLY_SER)
assert(data.count() === 4000000)
assert(data.count() === 4000000)
assert(data.count() === 4000000)
System.clearProperty("spark.storage.memoryFraction")
}
test("compute when only some partitions fit in memory") {
System.setProperty("spark.storage.memoryFraction", "0.01")
sc = new SparkContext(clusterUrl, "test")
// data will be 4 million * 4 bytes = 16 MB in size, but our memoryFraction set the cache
// to only 5 MB (0.01 of 512 MB), so not all of it will fit in memory; we use 20 partitions
// to make sure that *some* of them do fit though
val data = sc.parallelize(1 to 4000000, 20).persist(StorageLevel.MEMORY_ONLY_SER)
assert(data.count() === 4000000)
assert(data.count() === 4000000)
assert(data.count() === 4000000)
System.clearProperty("spark.storage.memoryFraction")
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment