Search code examples
scalacassandrasbtspark-cassandra-connectorsbt-assembly

Spark-Cassandra-Connectror sbt assembly error: "deduplicate: different file contents found in the following"


I have a problem running sbt/sbt assembly inside spark-cassandra-connector :

java.lang.RuntimeException: deduplicate: different file contents found in the following:
    /root/.ivy2/cache/org.apache.spark/spark-network-common_2.10/jars/spark-network-common_2.10-1.6.0-rc2.jar:META-INF/maven/com.google.guava/guava/pom.properties
    /root/.ivy2/cache/com.google.guava/guava/bundles/guava-16.0.1.jar:META-INF/maven/com.google.guava/guava/pom.properties
            at sbtassembly.Assembly$.sbtassembly$Assembly$$applyStrategy$1(Assembly.scala:106)
            at sbtassembly.Assembly$$anonfun$13.apply(Assembly.scala:123)
            at sbtassembly.Assembly$$anonfun$13.apply(Assembly.scala:120)
            at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
            at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
            at scala.collection.Iterator$class.foreach(Iterator.scala:727)
            at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
            at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
            at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
            at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
            at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
            at sbtassembly.Assembly$.applyStrategies(Assembly.scala:125)
            at sbtassembly.Assembly$.x$1$lzycompute$1(Assembly.scala:25)
            at sbtassembly.Assembly$.x$1$1(Assembly.scala:23)
            at sbtassembly.Assembly$.stratMapping$lzycompute$1(Assembly.scala:23)
            at sbtassembly.Assembly$.stratMapping$1(Assembly.scala:23)
            at sbtassembly.Assembly$.inputs$lzycompute$1(Assembly.scala:67)
            at sbtassembly.Assembly$.inputs$1(Assembly.scala:57)
            at sbtassembly.Assembly$.apply(Assembly.scala:83)

and may be I found the solution:

val meta = """META.INF(.)*""".r

mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) =>
  {
    case PathList("javax", "servlet", xs @ _*)         => MergeStrategy.first
    case PathList(ps @ _*) if ps.last endsWith ".html" => MergeStrategy.first
    case "application.conf" => MergeStrategy.concat
    case meta(_) => MergeStrategy.discard
    case x => old(x)
  }
}

but can anyone tell me where to write this code?))


Solution

  • Fix for sbt assembly

    lazy val sbtAssemblySettings = assemblySettings ++ Seq(
             parallelExecution in assembly := false,
        -    jarName in assembly <<= (baseDirectory, version) map { (dir, version) => s"${dir.name}-assembly-$version.jar" },
        +    assemblyJarName in assembly <<= (baseDirectory, version) map { (dir, version) => s"${dir.name}-assembly-$version.jar" },
             run in Compile <<= Defaults.runTask(fullClasspath in Compile, mainClass in (Compile, run), runner in (Compile, run)),
             assemblyOption in assembly ~= { _.copy(includeScala = false) },
             assemblyMergeStrategy in assembly <<= (assemblyMergeStrategy in assembly) {
               (old) => {
        -        case PathList("META-INF", "io.netty.versions.properties", xs @ _*) => MergeStrategy.last
        +        case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard
        +        case PathList("META-INF", xs @ _*) => MergeStrategy.last
                 case PathList("com", "google", xs @ _*) => MergeStrategy.last
        -        case PathList("META-INF", "io.netty.versions.properties") => MergeStrategy.last
        +        case PathList("com", "esotericsoftware", "minlog", xs @ _ *) => MergeStrategy.last
        +        case PathList("io", "netty", xs @ _*) => MergeStrategy.last
        +        case PathList("javax", "xml", xs @ _*) => MergeStrategy.last
        +        case PathList("org", "apache", "commons", xs @ _ *) => MergeStrategy.last
        +        case PathList("org", "apache", "hadoop", "yarn", xs @ _ *) => MergeStrategy.last
        +        case PathList("org", "apache", "spark", xs @ _ *) => MergeStrategy.last
        +        case PathList("org", "fusesource", xs @ _ *) => MergeStrategy.last
                 case x => old(x)
               }