Search code examples
apache-flinkflink-sql

Package the apache flink program with the maven shade plugin but NoClassDefFoundError appears when submitting


I need to submit a flink sql program to run on yarn,

I packaged it according to the flink official website https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/connectors/table/overview/

It works normally when adding the dependency to /lib under the flink directory, But if I want to load dependencies from the jar package instead of under /lib

java.lang.NoClassDefFoundError will occur when I execute flink run

This is my pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  

    <groupId>org.example</groupId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <flink.version>1.14.0</flink.version>
        <hive.version>3.1.1</hive.version>
        <scala.version>2.11.12</scala.version>
        <scala.binary.version>2.11</scala.binary.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
            <scope>provided</scope>
            <version>${flink.version}</version>
        </dependency>
        <!--Hive Dependency-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>3.1.1</version>
            <!--            <scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala-bridge_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
            <scope>provided</scope>
        </dependency>
        <!--        <exclusions>-->
        <!--            <exclusion>-->
        <!--                <groupId>org.apache.hive</groupId>-->
        <!--                <artifactId>hive-common</artifactId>-->
        <!--            </exclusion>-->
        <!--        </exclusions>-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hive_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <!--        <dependency>-->
        <!--            <groupId>org.apache.flink</groupId>-->
        <!--            <artifactId>flink-table-api-java-bridge_${scala.version}</artifactId>-->
        <!--            <version>${flink.version}</version>-->
        <!--        </dependency>-->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.76</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>2.6.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-csv</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <!--        <dependency>-->
        <!--            <groupId>org.apache.hadoop</groupId>-->
        <!--            <artifactId>hadoop-common</artifactId>-->
        <!--            <version>3.1.1</version>-->
        <!--            <scope>provided</scope>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.apache.flink</groupId>-->
        <!--            <artifactId>flink-table-planner-blink_2.12</artifactId>-->
        <!--            <version>${flink.version}</version>-->
        <!--            <scope>provided</scope>-->
        <!--        </dependency>-->

        <!--        <dependency>-->
        <!--            <groupId>org.apache.hadoop</groupId>-->
        <!--            <artifactId>hadoop-hdfs-client</artifactId>-->
        <!--            <version>3.1.1</version>-->
        <!--        </dependency>-->
        <!--        <dependency>-->
        <!--            <groupId>org.apache.hadoop</groupId>-->
        <!--            <artifactId>hadoop-client-api</artifactId>-->
        <!--            <version>3.1.1</version>-->
        <!--        </dependency>-->
        <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>/Library/Java/JavaVirtualMachines/jdk1.8.0_311.jdk/Contents/Home/lib/tools.jar</systemPath>
        </dependency>
        <!--        <dependency>-->
        <!--            <groupId>org.apache.flink</groupId>-->
        <!--            <artifactId>flink-clients_${scala.version}</artifactId>-->
        <!--            <version>${flink.version}</version>-->
        <!--            <scope>provided</scope>-->
        <!--        </dependency>-->
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>4.4.0</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                        <configuration>
                            <scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
                            <scalaVersion>${scala.version}</scalaVersion>
                            <args>
                                <arg>-dependencyfile</arg>
                                <arg>${project.build.directory}/.scala_dependencies</arg>
                            </args>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.2.0</version>
                <executions>
                    <execution>
                        <id>shade</id>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <transformers combine.children="append">
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.ghost.executable.hive.FlinkPlaySubmit</mainClass>
                                </transformer>
                            </transformers>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <artifactSet>
                                <excludes>
                                    <exclude>org.apache.flink:force-shading</exclude>
                                    <exclude>com.google.code.findbugs:jsr305</exclude>
                                    <exclude>org.slf4j:*</exclude>
                                    <exclude>log4j:*</exclude>
                                </excludes>
                            </artifactSet>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

I executed maven clean and maven package to get the jar packages, For example, FlinkTest-1.0-SNAPSHOT.jar and original-FlinkTest-1.0-SNAPSHOT.jar, And then executed

flink run -t yarn-per-job -D yarn.application.name=flink_test -detached -c com.ghost.executable.hive.FlinkPlaySubmit FlinkTest-1.0-SNAPSHOT.jar

The following exception occurred

java.lang.NoClassDefFoundError: org/apache/kafka/clients/consumer/OffsetResetStrategy

I found that there is this dependency in the jar package, but the flink program can't find it

What other operations do I need to package and execute?

Thanks!

This is my flink/lib

-rw-r--r--  1 ghost  staff     167761 12  5 21:43 antlr-runtime-3.5.2.jar
-rw-r--r--  1 ghost  staff    7685322 12  5 21:29 flink-connector-hive_2.11-1.14.0.jar
-rw-r--r--  1 ghost  staff     388181 12  5 23:01 flink-connector-kafka_2.11-1.14.0.jar
-rw-r--r--  1 ghost  staff      85588  9 22 21:37 flink-csv-1.14.0.jar
-rw-r--r--  1 ghost  staff  143645853  9 22 21:40 flink-dist_2.11-1.14.0.jar
-rw-r--r--  1 ghost  staff     153148  9 22 21:36 flink-json-1.14.0.jar
-rw-rw-r--  1 ghost  staff    7709731  9  1 18:31 flink-shaded-zookeeper-3.4.14.jar
-rw-r--r--  1 ghost  staff   42286825  9 22 21:39 flink-table_2.11-1.14.0.jar
-rw-r--r--  1 ghost  staff    1654887 12 10 18:31 hadoop-mapreduce-client-core-3.1.1.jar.bak
-rw-r--r--  1 ghost  staff   40605995 12  5 21:43 hive-exec-3.1.1.jar
-rw-r--r--  1 ghost  staff    3535156 12 10 16:22 kafka-clients-2.6.3.jar.bak
-rw-r--r--  1 ghost  staff     313702 12  5 21:43 libfb303-0.9.3.jar
-rw-rw-r--  1 ghost  staff     206756  9  1 18:28 log4j-1.2-api-2.14.1.jar
-rw-rw-r--  1 ghost  staff     300365  9  1 18:28 log4j-api-2.14.1.jar
-rw-rw-r--  1 ghost  staff    1745700  9  1 18:28 log4j-core-2.14.1.jar
-rw-rw-r--  1 ghost  staff      23625  9  1 18:28 log4j-slf4j-impl-2.14.1.jar

I provided the program with dependencies in addition to kafka-client and hadoop-mapreduce-client under the path I want to load these two dependencies from the jar package, and the two items in the pom are not marked as provided, but they don't seem to be loaded at runtime


Solution

  • In your pom, you have the <scope> set to provided for the flink-connector-kafka_${scala.binary.version} artifact. So the Maven shade plugin doesn't think it needs to include that jar (and its unique transitive dependencies) in your uber jar. So you either need to ensure you have that jar (and its dependencies) installed on your YARN cluster, or remove the provided scope.