I am new to akka streaming. I am running the below example from github. But the messages to "Helloer" actor are not receiving and displaying in the output console.
StreamingApp.scala
import _root_.akka.actor.{ Actor, Props }
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.akka.{ ActorReceiver, AkkaUtils }
class Helloer extends ActorReceiver {
override def preStart() = {
println("")
println("=== Helloer is starting up ===")
println(s"=== path=${context.self.path} ===")
println("")
}
def receive = {
// store() method allows us to store the message so Spark Streaming knows about it
// This is the integration point (from Akka's side) between Spark Streaming and Akka
case s => store(s)
}
}
object StreamingApp {
def main(args: Array[String]) {
// Configuration for a Spark application.
// Used to set various Spark parameters as key-value pairs.
val driverPort = 7777
val driverHost = "localhost"
val conf = new SparkConf()
.setMaster("local[*]") // run locally with as many threads as CPUs
.setAppName("Spark Streaming with Scala and Akka") // name in web UI
.set("spark.logConf", "true")
.set("spark.driver.port", driverPort.toString)
.set("spark.driver.host", driverHost)
val ssc = new StreamingContext(conf, Seconds(10))
val actorName = "helloer"
// This is the integration point (from Spark's side) between Spark Streaming and Akka system
// It's expected that the actor we're now instantiating will `store` messages (to close the integration loop)
val actorStream = AkkaUtils.createStream[String](ssc, Props[Helloer](), actorName)
// describe the computation on the input stream as a series of higher-level transformations
actorStream.reduce(_ + " " + _).print()
// Custom receiver
import pl.japila.spark.streaming.CustomReceiverInputDStream
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.dstream.ReceiverInputDStream
val input: ReceiverInputDStream[String] = ssc.receiverStream[String](CustomReceiverInputDStream(StorageLevel.NONE))
input.print()
// Data Ingestion from Kafka
import org.apache.spark.streaming.kafka._
// start the streaming context so the data can be processed
// and the actor gets started
ssc.start()
// FIXME wish I knew a better way to handle the asynchrony
java.util.concurrent.TimeUnit.SECONDS.sleep(3)
import _root_.akka.actor.ActorSystem
val actorSystem = ActorSystem("SparkStreamingAkka")
val url = s"akka.tcp://sparkDriver@$driverHost:$driverPort/user/Supervisor0/$actorName"
val helloer = actorSystem.actorSelection(url)
helloer ! "Hello"
helloer ! "from"
helloer ! "Spark Streaming"
helloer ! "with"
helloer ! "Scala"
helloer ! "and"
helloer ! "Akka"
import java.util.concurrent.TimeUnit.MINUTES
ssc.awaitTerminationOrTimeout(timeout = MINUTES.toMillis(1))
ssc.stop(stopSparkContext = true, stopGracefully = true)
}
}
The program using a customeReceiverInputDstream implementation. The below is the customreceiver.
customeReceiverInputDstream.scala
package pl.japila.spark.streaming
import org.apache.spark.streaming.receiver.Receiver
import org.apache.spark.storage.StorageLevel
case class CustomReceiverInputDStream[T](override val storageLevel: StorageLevel) extends Receiver[T](storageLevel) {
def onStart() {
println("\nHello from CustomReceiver.START\n")
}
def onStop() {
println("\nHello from CustomReceiver.STOP\n")
}
}
The below is the output deadletter messages i am getting.
.
.
.
Hello from CustomReceiver.START
.
.
.
17/10/10 08:00:05 DEBUG RecurringTimer: Callback for BlockGenerator called at time 1507636805400
[INFO] [10/10/2017 08:00:05.475] [SparkStreamingAkka-akka.actor.default-dispatcher-6] [akka://SparkStreamingAkka/deadLetters] Message [java.lang.String] from Actor[akka://SparkStreamingAkka/deadLetters] to Actor[akka://SparkStreamingAkka/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[INFO] [10/10/2017 08:00:05.475] [SparkStreamingAkka-akka.actor.default-dispatcher-6] [akka://SparkStreamingAkka/deadLetters] Message [java.lang.String] from Actor[akka://SparkStreamingAkka/deadLetters] to Actor[akka://SparkStreamingAkka/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[INFO] [10/10/2017 08:00:05.475] [SparkStreamingAkka-akka.actor.default-dispatcher-6] [akka://SparkStreamingAkka/deadLetters] Message [java.lang.String] from Actor[akka://SparkStreamingAkka/deadLetters] to Actor[akka://SparkStreamingAkka/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[INFO] [10/10/2017 08:00:05.475] [SparkStreamingAkka-akka.actor.default-dispatcher-6] [akka://SparkStreamingAkka/deadLetters] Message [java.lang.String] from Actor[akka://SparkStreamingAkka/deadLetters] to Actor[akka://SparkStreamingAkka/deadLetters] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[INFO] [10/10/2017 08:00:05.475] [SparkStreamingAkka-akka.actor.default-dispatcher-6] [akka://SparkStreamingAkka/deadLetters] Message [java.lang.String] from Actor[akka://SparkStreamingAkka/deadLetters] to Actor[akka://SparkStreamingAkka/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[INFO] [10/10/2017 08:00:05.475] [SparkStreamingAkka-akka.actor.default-dispatcher-6] [akka://SparkStreamingAkka/deadLetters] Message [java.lang.String] from Actor[akka://SparkStreamingAkka/deadLetters] to Actor[akka://SparkStreamingAkka/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
[INFO] [10/10/2017 08:00:05.475] [SparkStreamingAkka-akka.actor.default-dispatcher-6] [akka://SparkStreamingAkka/deadLetters] Message [java.lang.String] from Actor[akka://SparkStreamingAkka/deadLetters] to Actor[akka://SparkStreamingAkka/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'.
17/10/10 08:00:05 DEBUG RecurringTimer: Callback for BlockGenerator called at time 1507636805600
17/10/10 08:00:05 DEBUG RecurringTimer: Callback for BlockGenerator called at time 1507636805600
[INFO] [10/10/2017 08:00:05.693] [Executor task launch worker-0] [Remoting] Remoting started; listening on addresses :[akka.tcp://[email protected]:2552]
[INFO] [10/10/2017 08:00:05.696] [Executor task launch worker-0] [Remoting] Remoting now listens on addresses: [akka.tcp://[email protected]:2552]
17/10/10 08:00:05 INFO ActorReceiverSupervisor: Supervision tree for receivers initialized at:akka://streaming-actor-system-0/user/Supervisor0
17/10/10 08:00:05 INFO ReceiverSupervisorImpl: Called receiver 0 onStart
17/10/10 08:00:05 INFO ReceiverSupervisorImpl: Waiting for receiver to be stopped
17/10/10 08:00:05 INFO ActorReceiverSupervisor: Started receiver worker at:akka://streaming-actor-system-0/user/Supervisor0/helloer
=== Helloer is starting up ===
=== path=akka://streaming-actor-system-0/user/Supervisor0/helloer ===
17/10/10 08:00:05 DEBUG RecurringTimer: Callback for BlockGenerator called at time 1507636805800
17/10/10 08:00:05 DEBUG RecurringTimer: Callback for BlockGenerator called at time 1507636805800
17/10/10 08:00:06 DEBUG RecurringTimer: Callback for BlockGenerator called at time 1507636806000
.
.
.
Ok, I see. The problem here is that the Actor that is created to act as Source, the "helloer" is started at a different ActorSystem and this code tries from find from the one named "SparkStreaminAkka" via akka.remote in another ActorSystem therefore a complete akka.tcp url is used. In this code it does not work, further investigation to be done... However it´s not mandatory to use a different ActorSystem in this example. A work around could be:
import _root_.akka.actor.{Actor, Props}
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.akka.{ActorReceiver, AkkaUtils}
class Helloer extends ActorReceiver {
override def preStart() = {
println("")
println("=== Helloer is starting up ===")
println(s"=== path=${context.self.path} ===")
println("")
}
def receive = {
// store() method allows us to store the message so Spark Streaming knows about it
// This is the integration point (from Akka's side) between Spark Streaming and Akka
case s => store(s)
}
}
// Create a common actor system
object CreateActorSystem {
lazy val as = _root_.akka.actor.ActorSystem("ActorSystemSpark")
}
object StreamingApp {
import StreamingApp._
def main(args: Array[String]) {
// Configuration for a Spark application.
// Used to set various Spark parameters as key-value pairs.
val driverPort = 7777
val driverHost = "localhost"
val conf = new SparkConf()
.setMaster("local[*]") // run locally with as many threads as CPUs
.setAppName("Spark Streaming with Scala and Akka") // name in web UI
.set("spark.logConf", "true")
.set("spark.driver.port", driverPort.toString)
.set("spark.driver.host", driverHost)
val ssc = new StreamingContext(conf, Seconds(10))
val actorName = "helloer"
// This is the integration point (from Spark's side) between Spark Streaming and Akka system
// It's expected that the actor we're now instantiating will `store` messages (to close the integration loop)
// Pass actorsystem as parameter
val actorStream = AkkaUtils.createStream[String](ssc, Props[Helloer](), actorName, actorSystemCreator = () => CreateActorSystem.as)
// describe the computation on the input stream as a series of higher-level transformations
actorStream.reduce(_ + " " + _).print()
// Custom receiver
import pl.japila.spark.streaming.CustomReceiverInputDStream
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.dstream.ReceiverInputDStream
val input: ReceiverInputDStream[String] = ssc.receiverStream[String](CustomReceiverInputDStream(StorageLevel.NONE))
input.print()
// Data Ingestion from Kafka
//import org.apache.spark.streaming.kafka._
// start the streaming context so the data can be processed
// and the actor gets started
ssc.start()
// FIXME wish I knew a better way to handle the asynchrony
java.util.concurrent.TimeUnit.SECONDS.sleep(3)
import _root_.akka.actor.ActorSystem
val actorSystem = CreateActorSystem.as
//Get the actor from the path. There is no nedd o akka.remote
val helloer = actorSystem.actorSelection("/user/Supervisor0/helloer")
helloer ! "Hello"
helloer ! "from"
helloer ! "Spark Streaming"
helloer ! "with"
helloer ! "Scala"
helloer ! "and"
helloer ! "Akka"
import java.util.concurrent.TimeUnit.MINUTES
ssc.awaitTerminationOrTimeout(timeout = MINUTES.toMillis(1))
ssc.stop(stopSparkContext = true, stopGracefully = true)
}
}
This will work