Getting strange NPE when trying to read s3 with Scalding / Hadoop. The paths are 100% correct.
Asking this question because it's surprisingly hard to Google and everytime I get this error I forget how I solved it. So posting on SO so I can Google myself.
Caused by: java.lang.NullPointerException
at org.apache.hadoop.fs.s3native.NativeS3FileSystem.listStatus(NativeS3FileSystem.java:479)
at org.apache.hadoop.fs.Globber.listStatus(Globber.java:69)
at org.apache.hadoop.fs.Globber.glob(Globber.java:217)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1623)
at com.twitter.scalding.FileSource.pathIsGood(FileSource.scala:57)
at com.twitter.scalding.FileSource$$anonfun$hdfsReadPathsAreGood$1.apply(FileSource.scala:89)
at com.twitter.scalding.FileSource$$anonfun$hdfsReadPathsAreGood$1.apply(FileSource.scala:89)
at scala.collection.LinearSeqOptimized$class.forall(LinearSeqOptimized.scala:70)
at scala.collection.immutable.List.forall(List.scala:84)
at com.twitter.scalding.FileSource.hdfsReadPathsAreGood(FileSource.scala:89)
at com.twitter.scalding.FileSource.validateTaps(FileSource.scala:101)
at com.twitter.scalding.Job$$anonfun$validateSources$1.apply(Job.scala:158)
at com.twitter.scalding.Job$$anonfun$validateSources$1.apply(Job.scala:153)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at com.twitter.scalding.Job.validateSources(Job.scala:153)
at com.twitter.scalding.Job.buildFlow(Job.scala:91)
at com.twitter.scalding.Job.run(Job.scala:126)
at com.twitter.scalding.Tool.start$1(Tool.scala:109)
at com.twitter.scalding.Tool.run(Tool.scala:125)
at com.twitter.scalding.Tool.run(Tool.scala:72)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at com.twitter.scalding.Tool$.main(Tool.scala:133)
This happens when your s3 credentials are not set correctly. Make sure fs.s3n.awsAccessKeyId
and fs.s3n.awsSecretAccessKey
are set, and correct.