Search code examples
apache-sparkcassandraspark-cassandra-connector

Spark-Cassandra Connector Always Defaulting to 127.0.1.1


The Spark connector to Cassandra trying to connect to 127.0.1.1:9042, even though I am hardcoding the address. Even hardcoding the address conf.set("cassandra.connection.host", "37.61.205.66"), does not work. I do not want Cassandra CQL port to run on 127.0.1.1. What are the solutions.

POM.xml:

<dependencies>
        <!-- Scala and Spark dependencies -->
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.10</artifactId>
            <version>1.6.0</version>
        </dependency>
        <dependency>
            <groupId>com.datastax.spark</groupId>
            <artifactId>spark-cassandra-connector_2.10</artifactId>
            <version>1.5.0-RC1</version>
        </dependency>
                <dependency>
            <groupId>com.datastax.spark</groupId>
            <artifactId>spark-cassandra-connector-java_2.10</artifactId>
            <version>1.5.0-RC1</version>
        </dependency>
                <dependency>
            <groupId>com.datastax.cassandra</groupId>
            <artifactId>cassandra-driver-core</artifactId>
            <version>3.0.0-rc1</version>
        </dependency>
        <dependency>

Error:

Exception in thread "main" java.lang.reflect.InvocationTargetException
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.worker.DriverWrapper$.main(DriverWrapper.scala:58)
    at org.apache.spark.deploy.worker.DriverWrapper.main(DriverWrapper.scala)
Caused by: java.io.IOException: Failed to open native connection to Cassandra at {127.0.1.1}:9042
    at com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$connector$cql$CassandraConnector$$createSession(CassandraConnector.scala:163)
    at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$3.apply(CassandraConnector.scala:149)
    at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$3.apply(CassandraConnector.scala:149)
    at com.datastax.spark.connector.cql.RefCountedCache.createNewValueAndKeys(RefCountedCache.scala:31)
    at com.datastax.spark.connector.cql.RefCountedCache.acquire(RefCountedCache.scala:56)
    at com.datastax.spark.connector.cql.CassandraConnector.openSession(CassandraConnector.scala:82)
    at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:110)
    at com.datastax.spark.connector.cql.CassandraConnector.withClusterDo(CassandraConnector.scala:121)
    at com.datastax.spark.connector.cql.Schema$.fromCassandra(Schema.scala:322)
    at com.datastax.spark.connector.cql.Schema$.tableFromCassandra(Schema.scala:342)
    at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.tableDef(CassandraTableRowReaderProvider.scala:50)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef$lzycompute(CassandraTableScanRDD.scala:60)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef(CassandraTableScanRDD.scala:60)
    at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.verify(CassandraTableRowReaderProvider.scala:137)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.verify(CassandraTableScanRDD.scala:60)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.getPartitions(CassandraTableScanRDD.scala:232)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246)
    at scala.Option.getOrElse(Option.scala:121)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:246)
    at org.apache.spark.rdd.RDD$$anonfun$distinct$2.apply(RDD.scala:401)
    at org.apache.spark.rdd.RDD$$anonfun$distinct$2.apply(RDD.scala:401)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
    at org.apache.spark.rdd.RDD.distinct(RDD.scala:400)
    at com.access_company.twine.OnlineGatewayCount$.main(OnlineGatewayCount.scala:93)
    at com.access_company.twine.OnlineGatewayCount.main(OnlineGatewayCount.scala)
    ... 6 more
Caused by: com.datastax.driver.core.exceptions.NoHostAvailableException: All host(s) tried for query failed (tried: /127.0.1.1:9042 (com.datastax.driver.core.exceptions.TransportException: [/127.0.1.1] Cannot connect))
    at com.datastax.driver.core.ControlConnection.reconnectInternal(ControlConnection.java:233)
    at com.datastax.driver.core.ControlConnection.connect(ControlConnection.java:79)
    at com.datastax.driver.core.Cluster$Manager.init(Cluster.java:1424)
    at com.datastax.driver.core.Cluster.getMetadata(Cluster.java:403)
    at com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$connector$cql$CassandraConnector$$createSession(CassandraConnector.scala:156)

Solution

  • The proper setting is prefixed with 'spark'. Please see the docs.

    conf.set("spark.cassandra.connection.host", cassandraHost)