diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt b/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt index d075cc0babc3e1da868404af437f268832d76003..d6094d774a5b8a67fe7272d7dc90fcd9f5af9691 100644 --- a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt +++ b/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt @@ -125,29 +125,57 @@ just don should now -d -ll -m -o -re -ve -y -ain -aren -couldn -didn -doesn -hadn -hasn -haven -isn -ma -mightn -mustn -needn -shan -shouldn -wasn -weren -won -wouldn +i'll +you'll +he'll +she'll +we'll +they'll +i'd +you'd +he'd +she'd +we'd +they'd +i'm +you're +he's +she's +it's +we're +they're +i've +we've +you've +they've +isn't +aren't +wasn't +weren't +haven't +hasn't +hadn't +don't +doesn't +didn't +won't +wouldn't +shan't +shouldn't +mustn't +can't +couldn't +cannot +could +here's +how's +let's +ought +that's +there's +what's +when's +where's +who's +why's +would \ No newline at end of file diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala index 957cf58a68f853b07cac219ca9c59c7a9ff68225..5262b146b184e59ebe35fb0075f5791a4e139749 100755 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala @@ -45,7 +45,7 @@ class StopWordsRemoverSuite .setOutputCol("filtered") val dataSet = Seq( (Seq("test", "test"), Seq("test", "test")), - (Seq("a", "b", "c", "d"), Seq("b", "c")), + (Seq("a", "b", "c", "d"), Seq("b", "c", "d")), (Seq("a", "the", "an"), Seq()), (Seq("A", "The", "AN"), Seq()), (Seq(null), Seq(null)),