I am trying to read data from the Kafka topic and I was able to read it successfully. However, I want to extract data and return it as a Tuple
. So for that, I am trying to perform map
operation but it is not allowing me to perform by saying that cannot resolve overloaded method 'map'
. Below is my code:
package KafkaAsSource
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.datastream.DataStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import java.util.Properties
object ReadAndValidateJSON {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment()
//env.enableCheckpointing(5000)
val properties = new Properties()
properties.setProperty("bootstrap.servers", "localhost:9092")
properties.setProperty("zookeeper.connect", "localhost:2181")
val data:DataStream[String] = getDataFromKafkaTopic(properties,env)
val mappedData: DataStream[jsonData] = data.map(v => v)
.map {
v =>
val id = v["id"]
val category = v["category"]
val eventTime = v["eventTime"]
jsonData(id,category,eventTime)
}
data.print()
env.execute("ReadAndValidateJSON")
}
def getDataFromKafkaTopic(properties: Properties,env:StreamExecutionEnvironment): DataStream[String] = {
val consumer = new FlinkKafkaConsumer[String]("maddy1", new SimpleStringSchema(), properties)
consumer.setStartFromEarliest()
val src: DataStream[String] = env.addSource(consumer)
return src
}
}
Pom.xml
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-core -->
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>${flink-version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink-version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>${flink-version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-core -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>${flink-version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-cassandra_2.11</artifactId>
<version>${flink-version}</version>
</dependency>
</dependencies>
Kafka Topic Data:
{
"id":"7",
"Category":"Flink",
"eventTime":"2021-12-27 20:52:58.708"
}
{
"id":"9",
"Category":"Flink",
"eventTime":"2021-12-27 20:52:58.727"
}
{
"id":"10",
"Category":"Flink",
"eventTime":"2021-12-27 20:52:58.734"
}
Where am I exactly going wrong? Are the dependencies correct? My Flink version is 1.12.2
Try adding
import org.apache.flink.streaming.api.scala._