I'm new to Spark, and am learning it on the Cloudera Distr for Hadoop (CDH). I'm trying to execute the PageRank and BFS functions through Jupyter Notebook, which was initiated using the following command:
pyspark --packages graphframes:graphframes:0.1.0-spark1.6,com.databricks:spark-csv_2.11:1.2.0
The below is the PageRank function command I tried to run, along with the error message:
ranks = tripGraph.pageRank(resetProbability=0.15, maxIter=5)
Output:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-20-34d549cc033e> in <module>()
----> 1 ranks = tripGraph.pageRank(resetProbability=0.15, maxIter=5)
2 ranks.vertices.orderBy(ranks.vertices.pagerank.desc()).limit(20).show()
/tmp/spark-3bdc323d-a439-4f0a-ac1d-4e64ef4d1396/userFiles-0c248c5c-29fc-44c7-bfd9-3543500350dc/graphframes_graphframes-0.1.0-spark1.6.jar/graphframes/graphframe.pyc in pageRank(self, resetProbability, sourceId, maxIter, tol)
/usr/lib/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
811 answer = self.gateway_client.send_command(command)
812 return_value = get_return_value(
--> 813 answer, self.gateway_client, self.target_id, self.name)
814
815 for temp_arg in temp_args:
/usr/lib/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
43 def deco(*a, **kw):
44 try:
---> 45 return f(*a, **kw)
46 except py4j.protocol.Py4JJavaError as e:
47 s = e.java_exception.toString()
/usr/lib/spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
306 raise Py4JJavaError(
307 "An error occurred while calling {0}{1}{2}.\n".
--> 308 format(target_id, ".", name), value)
309 else:
310 raise Py4JError(
Py4JJavaError: An error occurred while calling o106.run.
: java.lang.AbstractMethodError
at org.apache.spark.Logging$class.log(Logging.scala:50)
at org.apache.spark.graphx.lib.backport.PageRank$.log(PageRank.scala:65)
at org.apache.spark.Logging$class.logInfo(Logging.scala:58)
at org.apache.spark.graphx.lib.backport.PageRank$.logInfo(PageRank.scala:65)
at org.apache.spark.graphx.lib.backport.PageRank$.runWithOptions(PageRank.scala:148)
at org.graphframes.lib.PageRank$.run(PageRank.scala:130)
at org.graphframes.lib.PageRank.run(PageRank.scala:104)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:209)
at java.lang.Thread.run(Thread.java:745)
I'm getting the same error messages for the BFS function I'm trying:
filteredPaths = tripGraph.bfs(
fromExpr = "id = 'SEA'",
toExpr = "id = 'SFO'",
maxPathLength = 1)
Output:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-22-74394b11f50d> in <module>()
4 fromExpr = "id = 'SEA'",
5 toExpr = "id = 'SFO'",
----> 6 maxPathLength = 1)
7
8 filteredPaths.show()
/tmp/spark-3bdc323d-a439-4f0a-ac1d-4e64ef4d1396/userFiles-0c248c5c-29fc-44c7-bfd9-3543500350dc/graphframes_graphframes-0.1.0-spark1.6.jar/graphframes/graphframe.pyc in bfs(self, fromExpr, toExpr, edgeFilter, maxPathLength)
/usr/lib/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
811 answer = self.gateway_client.send_command(command)
812 return_value = get_return_value(
--> 813 answer, self.gateway_client, self.target_id, self.name)
814
815 for temp_arg in temp_args:
/usr/lib/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
43 def deco(*a, **kw):
44 try:
---> 45 return f(*a, **kw)
46 except py4j.protocol.Py4JJavaError as e:
47 s = e.java_exception.toString()
/usr/lib/spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
306 raise Py4JJavaError(
307 "An error occurred while calling {0}{1}{2}.\n".
--> 308 format(target_id, ".", name), value)
309 else:
310 raise Py4JError(
Py4JJavaError: An error occurred while calling o147.run.
: java.lang.AbstractMethodError
at org.apache.spark.Logging$class.log(Logging.scala:50)
at org.graphframes.lib.BFS$.log(BFS.scala:131)
at org.apache.spark.Logging$class.logInfo(Logging.scala:58)
at org.graphframes.lib.BFS$.logInfo(BFS.scala:131)
at org.graphframes.lib.BFS$.org$graphframes$lib$BFS$$run(BFS.scala:212)
at org.graphframes.lib.BFS.run(BFS.scala:126)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:209)
at java.lang.Thread.run(Thread.java:745)
Can you please let me know the issue?
Thanks, Sasi.
You are using incompatible Scala versions:
graphframes:graphframes:0.1.0-spark1.6
- Scala 2.10com.databricks:spark-csv_2.11:1.2.0
- Scala 2.11You have to use the same Scala version for all components (com.databricks:spark-csv_2.10:1.2.0
if Spark is compiled with Scala 2.10). Please consult Resolving dependency problems in Apache Spark for details.