Search code examples
nlprdfjenaprotegesesame

Use RDF API (Jena, OpenRDF or Protege) to convert OpenIE outputs


I was recommended to use one of the APIs (Jena, OpenRDF or Protege) to convert the outputs that I generated from OpenIE4.1 jar file (downloadable from http://knowitall.github.io/openie/). The following is the sample OpenIE4.1 output format: confidence score followed by subject, predicate, object triplet

    The rail launchers are conceptually similar to the underslung SM-1
    0.93 (The rail launchers; are; conceptually similar to the underslung SM-1)

I planned to produce triples that follow this pattern from above output (in fact, hundreds of above outputs have been generated by processing a sets of free text documents, only confidence score greater than certain value will be processed):

Given

    subject: The rail launchers
    predicate: are
    object: conceptually similar to the underslung SM-1

(confidence score is ignored)

  1. Create a blank node identifier for each distinct :subject in the file (let’s call it bnode_s)
  2. Create a blank node identifier for each distinct :object in the file (let’s call it bnode_o)
  3. Define a URI for each distinct predicate

BTW, I do have outputs that produce more than triplets, for instance John gave Mary a birthday gift (John; gave; Mary; a birthday gift) which is more complicated to product RDF triplet.

However, I'm not familiar with any of the API mentioned above and don't know the input format that API may take.


Solution

  • import org.apache.jena.riot.Lang;
    import org.apache.jena.riot.RDFDataMgr;
    
    import com.hp.hpl.jena.rdf.model.Model;
    import com.hp.hpl.jena.rdf.model.ModelFactory;
    import com.hp.hpl.jena.rdf.model.Property;
    import com.hp.hpl.jena.rdf.model.Resource;
    import com.hp.hpl.jena.util.URIref;
    import com.hp.hpl.jena.vocabulary.RDF;
    import com.hp.hpl.jena.vocabulary.RDFS;
    import com.hp.hpl.jena.vocabulary.XSD;
    
    public class OpenIETripletConversionExample {
        public static void main(String[] args) {
            // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
            Model model = ModelFactory.createDefaultModel();
            String NS = "http://stackoverflow.com/q/24897405/1281433/";
            model.setNsPrefix( "", NS );
            model.setNsPrefix( "rdf", RDF.getURI() );
            model.setNsPrefix( "xsd", XSD.getURI() );
            model.setNsPrefix( "rdfs", RDFS.getURI() );
    
            // Preserve the confidence level (optional).
            Property confidence = model.createProperty( NS+"confidence" ); 
    
            // Define some triplets to convert.
            Object[][] triplets = {
                    { 0.57, "The quick brown fox", "jumped", "over the lazy dog." },
                    { 0.93, "The rail launchers", "are", "conceptually similar to the underslung SM-1." }
            };
    
            // For each triplet, create a resource representing the sentence, as well as the subject, 
            // predicate, and object, and then add the triples to the model.
            for ( Object[] triplet : triplets )  {
                Resource statement = model.createResource();
                Resource subject = model.createResource().addProperty( RDFS.label, (String) triplet[1] );
                Property predicate = model.createProperty( NS+URIref.encode( (String) triplet[2] ));
                Resource object = model.createResource().addProperty( RDFS.label, (String) triplet[3] );
    
                statement.addLiteral( confidence, triplet[0] );
                statement.addProperty( RDF.subject, subject );
                statement.addProperty( RDF.predicate, predicate );
                statement.addProperty( RDF.object, object );
            }
    
            // Show the model in a few different formats.
            RDFDataMgr.write( System.out, model, Lang.TTL );
            RDFDataMgr.write( System.out, model, Lang.RDFXML );
            RDFDataMgr.write( System.out, model, Lang.NTRIPLES );
        }
    }
    
    @prefix :      <http://stackoverflow.com/q/24897405/1281433/> .
    @prefix rdfs:  <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix xsd:   <http://www.w3.org/2001/XMLSchema#> .
    @prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
    
    [ rdf:object     [ rdfs:label  "conceptually similar to the underslung SM-1." ] ;
      rdf:predicate  :are ;
      rdf:subject    [ rdfs:label  "The rail launchers" ] ;
      :confidence    "0.93"^^xsd:double
    ] .
    
    [ rdf:object     [ rdfs:label  "over the lazy dog." ] ;
      rdf:predicate  :jumped ;
      rdf:subject    [ rdfs:label  "The quick brown fox" ] ;
      :confidence    "0.57"^^xsd:double
    ] .
    
    <rdf:RDF
        xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
        xmlns="http://stackoverflow.com/q/24897405/1281433/"
        xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
        xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
      <rdf:Description>
        <rdf:object rdf:parseType="Resource">
          <rdfs:label>conceptually similar to the underslung SM-1.</rdfs:label>
        </rdf:object>
        <rdf:predicate rdf:resource="http://stackoverflow.com/q/24897405/1281433/are"/>
        <rdf:subject rdf:parseType="Resource">
          <rdfs:label>The rail launchers</rdfs:label>
        </rdf:subject>
        <confidence rdf:datatype="http://www.w3.org/2001/XMLSchema#double"
        >0.93</confidence>
      </rdf:Description>
      <rdf:Description>
        <rdf:object rdf:parseType="Resource">
          <rdfs:label>over the lazy dog.</rdfs:label>
        </rdf:object>
        <rdf:predicate rdf:resource="http://stackoverflow.com/q/24897405/1281433/jumped"/>
        <rdf:subject rdf:parseType="Resource">
          <rdfs:label>The quick brown fox</rdfs:label>
        </rdf:subject>
        <confidence rdf:datatype="http://www.w3.org/2001/XMLSchema#double"
        >0.57</confidence>
      </rdf:Description>
    </rdf:RDF>
    
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffe <http://www.w3.org/2000/01/rdf-schema#label> "The quick brown fox" .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffa .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <http://stackoverflow.com/q/24897405/1281433/are> .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffb .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://stackoverflow.com/q/24897405/1281433/confidence> "0.93"^^<http://www.w3.org/2001/XMLSchema#double> .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffa <http://www.w3.org/2000/01/rdf-schema#label> "conceptually similar to the underslung SM-1." .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffd .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <http://stackoverflow.com/q/24897405/1281433/jumped> .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffe .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://stackoverflow.com/q/24897405/1281433/confidence> "0.57"^^<http://www.w3.org/2001/XMLSchema#double> .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffd <http://www.w3.org/2000/01/rdf-schema#label> "over the lazy dog." .
    _:BX2D492663e1X3A1475ff7864cX3AX2D7ffb <http://www.w3.org/2000/01/rdf-schema#label> "The rail launchers" .