From a41ca203753f1c4f533dd78d41cf2f821b332990 Mon Sep 17 00:00:00 2001 From: Matei Zaharia <matei@eecs.berkeley.edu> Date: Mon, 4 Oct 2010 12:01:05 -0700 Subject: [PATCH] Added splitWords function in Utils --- src/scala/spark/Utils.scala | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/scala/spark/Utils.scala b/src/scala/spark/Utils.scala index 52bcb89f00..27d73aefbd 100644 --- a/src/scala/spark/Utils.scala +++ b/src/scala/spark/Utils.scala @@ -2,7 +2,9 @@ package spark import java.io._ -private object Utils { +import scala.collection.mutable.ArrayBuffer + +object Utils { def serialize[T](o: T): Array[Byte] = { val bos = new ByteArrayOutputStream val oos = new ObjectOutputStream(bos) @@ -25,4 +27,27 @@ private object Utils { } return ois.readObject.asInstanceOf[T] } + + def isAlpha(c: Char) = { + (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + } + + def splitWords(s: String): Seq[String] = { + val buf = new ArrayBuffer[String] + var i = 0 + while (i < s.length) { + var j = i + while (j < s.length && isAlpha(s.charAt(j))) { + j += 1 + } + if (j > i) { + buf += s.substring(i, j); + } + i = j + while (i < s.length && !isAlpha(s.charAt(i))) { + i += 1 + } + } + return buf + } } -- GitLab