I am new to spark, can you please help in this? The below simple pipeline to do a logistic regression produces an exception: The Code: package pipeline.tutorial.com
import org.apache.log4j.Level
import org.apache.log4j.Logger
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
import org.apache.spark.ml.feature.RFormula
import org.apache.spark.ml.tuning.ParamGridBuilder
import org.apache.spark.sql.SparkSession
import org.apache.spark.ml.tuning.TrainValidationSplit
object PipelineDemo {
def main(args: Array[String]) {
val conf = new SparkConf()
conf.set("spark.master", "local")
conf.set("spark.app.name", "PipelineDemo")
val sc = new SparkContext(conf)
val spark = SparkSession.builder().appName("PipelineDemo").getOrCreate()
val df = spark.read.json("C:/Spark-The-Definitive-Guide-master/data/simple-ml")
val rForm = new RFormula()
val lr = new LogisticRegression().setLabelCol("label").setFeaturesCol("features")
val stages = Array(rForm, lr)
val pipeline = new Pipeline().setStages(stages)
val params = new ParamGridBuilder().addGrid(rForm.formula, Array(
"lab ~ . + color:value1",
"lab ~ . + color:value1 + color:value2")).addGrid(lr.elasticNetParam, Array(0.0, 0.5, 1.0)).addGrid(lr.regParam, Array(0.1, 2.0)).build()
val evaluator = new BinaryClassificationEvaluator()
val tvs = new TrainValidationSplit()
val Array(train, test) = df.randomSplit(Array(0.7, 0.3))
val model = tvs.fit(train)
val rs = model.transform(test)
rs.select("features", "label", "prediction").show()
// end code.
The code runs fine from the spark-shell when writing it as a spark application (using eclipse scala ide) it gives the error: Caused by: java.io.NotSerializableException: scala.runtime.LazyRef
I solved it by removing scala library from the build path, to do this, right click on the scala library container > build path > remove from build path not sure about the root cause though.