Search code examples
python-3.xnlpstanford-nlppycorenlp

How can I extract phrases from CoreNLPParser?


See the screenshot

As you can see from the image parser returns NP, VP, PP, NP. I want to be able to access all phrases on different depth. For instance, in depth=1 there are two phrases NP and VP, in depth=2 there are some other phrases, in depth=3 there are some other. How can I access phrases that belongs to depth = n with python?


Solution

  • package edu.stanford.nlp.examples;
    
    import edu.stanford.nlp.pipeline.*;
    import edu.stanford.nlp.trees.*;
    
    import java.util.*;
    import java.util.stream.*;
    
    public class ConstituencyParserExample {
    
        public static void main(String[] args) {
            String text = "The little lamb climbed the big mountain.";
            // set up pipeline properties
            Properties props = new Properties();
            // set the list of annotators to run
            props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
            // build pipeline
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
            // create a document object
            CoreDocument document = new CoreDocument(text);
            // annnotate the document
            pipeline.annotate(document);
            int maxDepth = 5;
            for (CoreSentence sentence : document.sentences()) {
                Set<Constituent> constituents = sentence.constituencyParse().constituents(
                        new LabeledScoredConstituentFactory(), maxDepth).stream().filter(
                                x -> x.label().value().equals("NP")).collect(Collectors.toSet());
                for (Constituent constituent : constituents) {
                    System.out.println("---");
                    System.out.println("label: "+constituent.label().value());
                    System.out.println(sentence.tokens().subList(constituent.start(), constituent.end()+1));
                }
            }
        }
    }