From a41ca203753f1c4f533dd78d41cf2f821b332990 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 4 Oct 2010 12:01:05 -0700
Subject: [PATCH] Added splitWords function in Utils

---
 src/scala/spark/Utils.scala | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/Utils.scala b/src/scala/spark/Utils.scala
index 52bcb89f00..27d73aefbd 100644
--- a/src/scala/spark/Utils.scala
+++ b/src/scala/spark/Utils.scala
@@ -2,7 +2,9 @@ package spark
 
 import java.io._
 
-private object Utils {
+import scala.collection.mutable.ArrayBuffer
+
+object Utils {
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream
     val oos = new ObjectOutputStream(bos)
@@ -25,4 +27,27 @@ private object Utils {
     }
     return ois.readObject.asInstanceOf[T]
   }
+
+  def isAlpha(c: Char) = {
+    (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+  }
+
+  def splitWords(s: String): Seq[String] = {
+    val buf = new ArrayBuffer[String]
+    var i = 0
+    while (i < s.length) {
+      var j = i
+      while (j < s.length && isAlpha(s.charAt(j))) {
+        j += 1
+      }
+      if (j > i) {
+        buf += s.substring(i, j);
+      }
+      i = j
+      while (i < s.length && !isAlpha(s.charAt(i))) {
+        i += 1
+      }
+    }
+    return buf
+  }
 }
-- 
GitLab