I'm using SCP to get the parse CFG tree for English sentences.
from corenlp import *
corenlp = StanfordCoreNLP()
corenlp.parse("Every cat loves a dog")
My expected output is a tree like this:
(S (NP (DET Every) (NN cat)) (VP (VT loves) (NP (DET a) (NN dog))))
But what i got is:
(ROOT (S (NP (DT Every) (NN cat)) (VP (VBZ loves) (NP (DT a) (NN dog)))))
How to change the POS tag as expected and remove the ROOT node?
Thanks
You can use nltk.tree module from NLTK.
from nltk.tree import *
def traverse(t):
try:
# Replace Labels
if t.label() == "DT":
t.set_label("DET")
elif t.label() == "VBZ":
t.set_label("VT")
except AttributeError:
return
for child in t:
traverse(child)
output_tree= "(ROOT (S (NP (DT Every) (NN cat)) (VP (VBZ loves) (NP (DT a) (NN dog)))))"
tree = ParentedTree.fromstring(output_tree)
# Remove ROOT Element
if tree.label() == "ROOT":
tree = tree[0]
traverse(tree)
print tree
# (S (NP (DET Every) (NN cat)) (VP (VT loves) (NP (DET a) (NN dog))))