Search code examples
scalaapache-flinkapache-nifi

Flink to NiFi connector


I need some help with transferring data from the output NiFi port to Flink using Scala code.

I'm stuck at .addSource() function. It asks for additional parameters ([OUT]) but when I provide them I keep getting an error. Scala code and the error message are below.

package flinkTest

import java.nio.charset.{Charset, StandardCharsets}

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala.DataStream
import org.apache.flink.streaming.connectors.nifi.NiFiSource
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.connectors.nifi.NiFiDataPacket

import org.apache.nifi.remote.client.{SiteToSiteClient, SiteToSiteClientConfig}

object NifiFlow {
  def main(): Unit = {

    // get the execution environment
    val env: StreamExecutionEnvironment = 
    StreamExecutionEnvironment.getExecutionEnvironment

    // get input data by connecting to NiFi
    val clientConfig: SiteToSiteClientConfig = new SiteToSiteClient.Builder()
      .url("http://localhost:8080/nifi")
      .portName("Data to flink")
      .requestBatchCount(2)
      .buildConfig()

    val nifiSource: SourceFunction[NiFiDataPacket] = new NiFiSource(clientConfig)

Here's the piece

    val streamSource: DataStream[NiFiDataPacket] = 
    env.addSource(nifiSource).setParallelism(2)

and some more code

    val dataStream = streamSource.map(dataPacket => new String(dataPacket.getContent, StandardCharsets.UTF_8))

    dataStream.print()

    env.execute()
  }
}

1) With [OUT]

Error:(28, 76) value nifiSource of type org.apache.flink.streaming.api.functions.source.SourceFunction[org.apache.flink.streaming.connectors.nifi.NiFiDataPacket] does not take type parameters.
    val streamSource: DataStream[NiFiDataPacket] = env.addSource(nifiSource[NiFiDataPacket]).setParallelism(2)

2) Without [OUT]

Error:(28, 66) type mismatch;
 found   : org.apache.flink.streaming.api.functions.source.SourceFunction[org.apache.flink.streaming.connectors.nifi.NiFiDataPacket]
 required: org.apache.flink.streaming.api.function.source.SourceFunction[?]
    val streamSource: DataStream[NiFiDataPacket] = env.addSource(nifiSource).setParallelism(2)

Example was taken here and rewritten to Scala.

I will appreciate any advice.

UPD2

package flinkTest

import org.apache.nifi.remote.client.{SiteToSiteClient, SiteToSiteClientConfig}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.nifi._

object NifiFlow {
  def main(): Unit = {

    // get the execution environment
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    // get input data by connecting to NiFi
    val clientConfig: SiteToSiteClientConfig = new SiteToSiteClient.Builder()
      .url("http://localhost:8080/nifi")
      .portName("Data to flink")
      .requestBatchCount(2)
      .buildConfig()

    val nifiSource = new NiFiSource(clientConfig)

    val streamSource: DataStream[String] = env
      .addSource(nifiSource)
      .map(x => x.getAttributes().toString)

    env.execute()
  }
}

ERROR

Connected to the target VM, address: '127.0.0.1:41218', transport: 'socket'
Exception in thread "main" org.apache.flink.api.common.functions.InvalidTypesException: Interfaces and abstract classes are not valid types: interface org.apache.flink.streaming.connectors.nifi.NiFiDataPacket
    at org.apache.flink.api.java.typeutils.TypeExtractor.privateGetForClass(TypeExtractor.java:871)
    at org.apache.flink.api.java.typeutils.TypeExtractor.privateGetForClass(TypeExtractor.java:863)
    at org.apache.flink.api.java.typeutils.TypeExtractor.createTypeInfoWithTypeHierarchy(TypeExtractor.java:406)
    at org.apache.flink.api.java.typeutils.TypeExtractor.privateCreateTypeInfo(TypeExtractor.java:197)
    at org.apache.flink.api.java.typeutils.TypeExtractor.createTypeInfo(TypeExtractor.java:184)
    at flinkTest.NifiFlow$.main(NiFiFlow.scala:23)

Solution

  • there is a special implementation of execution environment for scala

    org.apache.flink.streaming.api.scala.StreamExecutionEnvironment

    just use it instead of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment