Search code examples
apache-sparkrdfjenasansa

Error with SANSA unable to create RDD from NT file


Unable to Create RDD[Triple] using

sparkSession.rdf(Lang.NTRIPLES)(path)

Used to working without issue with Java 11, and Spark 2.4.x

Not working ,throwing error when using Java 8 and Spark 3.0

Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 28499
    at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.accept(BytecodeReadingParanamer.java:532)
    at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.access$200(BytecodeReadingParanamer.java:315)
    at com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:102)
    at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:76)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.getCtorParams(BeanIntrospector.scala:45)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1(BeanIntrospector.scala:59)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1$adapted(BeanIntrospector.scala:59)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:292)
    at scala.collection.Iterator.foreach(Iterator.scala:943)
    at scala.collection.Iterator.foreach$(Iterator.scala:943)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
    at scala.collection.IterableLike.foreach(IterableLike.scala:74)
    at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
    at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:292)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:289)
    at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.findConstructorParam$1(BeanIntrospector.scala:59)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$19(BeanIntrospector.scala:181)
    at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:285)
    at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
    at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
    at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
    at scala.collection.TraversableLike.map(TraversableLike.scala:285)
    at scala.collection.TraversableLike.map$(TraversableLike.scala:278)
    at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14(BeanIntrospector.scala:175)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14$adapted(BeanIntrospector.scala:174)
    at scala.collection.immutable.List.flatMap(List.scala:366)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.apply(BeanIntrospector.scala:174)
    at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$._descriptorFor(ScalaAnnotationIntrospectorModule.scala:21)
    at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.fieldName(ScalaAnnotationIntrospectorModule.scala:29)
    at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.findImplicitPropertyName(ScalaAnnotationIntrospectorModule.scala:77)
    at com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair.findImplicitPropertyName(AnnotationIntrospectorPair.java:490)
    at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector._addFields(POJOPropertiesCollector.java:380)
    at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.collectAll(POJOPropertiesCollector.java:308)
    at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.getJsonValueAccessor(POJOPropertiesCollector.java:196)
    at com.fasterxml.jackson.databind.introspect.BasicBeanDescription.findJsonValueAccessor(BasicBeanDescription.java:252)
    at com.fasterxml.jackson.databind.ser.BasicSerializerFactory.findSerializerByAnnotations(BasicSerializerFactory.java:346)
    at com.fasterxml.jackson.databind.ser.BeanSerializerFactory._createSerializer2(BeanSerializerFactory.java:216)
    at com.fasterxml.jackson.databind.ser.BeanSerializerFactory.createSerializer(BeanSerializerFactory.java:165)
    at com.fasterxml.jackson.databind.SerializerProvider._createUntypedSerializer(SerializerProvider.java:1388)
    at com.fasterxml.jackson.databind.SerializerProvider._createAndCacheUntypedSerializer(SerializerProvider.java:1336)
    at com.fasterxml.jackson.databind.SerializerProvider.findValueSerializer(SerializerProvider.java:510)
    at com.fasterxml.jackson.databind.SerializerProvider.findTypedValueSerializer(SerializerProvider.java:713)
    at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:308)
    at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:4094)
    at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:3404)
    at org.apache.spark.rdd.RDDOperationScope.toJson(RDDOperationScope.scala:52)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:145)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.SparkContext.withScope(SparkContext.scala:751)
    at org.apache.spark.SparkContext.makeRDD(SparkContext.scala:855)
    at com.xx.yy.catalog._CatalogDataBuilder.fromTriples(CatalogDataBuilder.scala:433)
    ***
    ***
    at com.xx.yy.example.TestExample.main(TestExample.scala)

Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 28499

Solution

  • I also had a java.lang.ArrayIndexOutOfBoundsException: 28499, very similar, after migrating to Spark 3.0.1 from 2.4.3, when performing a count, countApprox or rdd operation on Spark datasets.

    For me, this solution worked:

    https://programmersought.com/article/35311239379/

    Basically I added this dependency:

       <dependency>
            <groupId>com.thoughtworks.paranamer</groupId>
            <artifactId>paranamer</artifactId>
            <version>2.8</version>
        </dependency>