Skip to content
Snippets Groups Projects
Commit 96e04f4c authored by Xinghao's avatar Xinghao
Browse files

Fixed SVM and LR train functions to take Int instead of Double for Classification

parent 9398dced
No related branches found
No related tags found
No related merge requests found
......@@ -86,19 +86,19 @@ class LogisticRegressionLocalRandomSGD private (var stepSize: Double, var miniBa
this
}
def train(input: RDD[(Double, Array[Double])]): LogisticRegressionModel = {
def train(input: RDD[(Int, Array[Double])]): LogisticRegressionModel = {
val nfeatures: Int = input.take(1)(0)._2.length
val initialWeights = Array.fill(nfeatures)(1.0)
train(input, initialWeights)
}
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
initialWeights: Array[Double]): LogisticRegressionModel = {
// Add a extra variable consisting of all 1.0's for the intercept.
val data = input.map { case (y, features) =>
(y, Array(1.0, features:_*))
(y.toDouble, Array(1.0, features:_*))
}
val initalWeightsWithIntercept = Array(1.0, initialWeights:_*)
......@@ -141,13 +141,12 @@ object LogisticRegressionLocalRandomSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double,
......@@ -170,7 +169,7 @@ object LogisticRegressionLocalRandomSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double,
......@@ -192,7 +191,7 @@ object LogisticRegressionLocalRandomSGD {
* @return a LogisticRegressionModel which has the weights and offset from training.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double
)
......@@ -211,7 +210,7 @@ object LogisticRegressionLocalRandomSGD {
* @return a LogisticRegressionModel which has the weights and offset from training.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int)
: LogisticRegressionModel =
{
......@@ -224,7 +223,7 @@ object LogisticRegressionLocalRandomSGD {
System.exit(1)
}
val sc = new SparkContext(args(0), "LogisticRegression")
val data = MLUtils.loadLabeledData(sc, args(1))
val data = MLUtils.loadLabeledData(sc, args(1)).map(yx => (yx._1.toInt, yx._2))
val model = LogisticRegressionLocalRandomSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
sc.stop()
......
......@@ -94,19 +94,19 @@ class SVMLocalRandomSGD private (var stepSize: Double, var regParam: Double, var
this
}
def train(input: RDD[(Double, Array[Double])]): SVMModel = {
def train(input: RDD[(Int, Array[Double])]): SVMModel = {
val nfeatures: Int = input.take(1)(0)._2.length
val initialWeights = Array.fill(nfeatures)(1.0)
train(input, initialWeights)
}
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
initialWeights: Array[Double]): SVMModel = {
// Add a extra variable consisting of all 1.0's for the intercept.
val data = input.map { case (y, features) =>
(y, Array(1.0, features:_*))
(y.toDouble, Array(1.0, features:_*))
}
val initalWeightsWithIntercept = Array(1.0, initialWeights:_*)
......@@ -155,7 +155,7 @@ object SVMLocalRandomSGD {
* the number of features in the data.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double,
regParam: Double,
......@@ -178,7 +178,7 @@ object SVMLocalRandomSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double,
regParam: Double,
......@@ -200,7 +200,7 @@ object SVMLocalRandomSGD {
* @return a SVMModel which has the weights and offset from training.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double,
regParam: Double)
......@@ -219,7 +219,7 @@ object SVMLocalRandomSGD {
* @return a SVMModel which has the weights and offset from training.
*/
def train(
input: RDD[(Double, Array[Double])],
input: RDD[(Int, Array[Double])],
numIterations: Int)
: SVMModel =
{
......@@ -232,7 +232,7 @@ object SVMLocalRandomSGD {
System.exit(1)
}
val sc = new SparkContext(args(0), "SVM")
val data = MLUtils.loadLabeledData(sc, args(1))
val data = MLUtils.loadLabeledData(sc, args(1)).map(yx => (yx._1.toInt, yx._2))
val model = SVMLocalRandomSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
sc.stop()
......
......@@ -38,7 +38,7 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll {
offset: Double,
scale: Double,
nPoints: Int,
seed: Int): Seq[(Double, Array[Double])] = {
seed: Int): Seq[(Int, Array[Double])] = {
val rnd = new Random(seed)
val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
......@@ -51,19 +51,19 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll {
// y <- A + B*x + rLogis()
// y <- as.numeric(y > 0)
val y: Seq[Double] = (0 until nPoints).map { i =>
val y: Seq[Int] = (0 until nPoints).map { i =>
val yVal = offset + scale * x1(i) + rLogis(i)
if (yVal > 0) 1.0 else 0.0
if (yVal > 0) 1 else 0
}
val testData = (0 until nPoints).map(i => (y(i), Array(x1(i))))
testData
}
def validatePrediction(predictions: Seq[Double], input: Seq[(Double, Array[Double])]) {
def validatePrediction(predictions: Seq[Int], input: Seq[(Int, Array[Double])]) {
val numOffPredictions = predictions.zip(input).filter { case (prediction, (expected, _)) =>
// A prediction is off if the prediction is more than 0.5 away from expected value.
math.abs(prediction - expected) > 0.5
math.abs(prediction.toDouble - expected.toDouble) > 0.5
}.size
// At least 80% of the predictions should be on.
assert(numOffPredictions < input.length / 5)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment