Search code examples
python-3.xannotationsvisualizationtext-parsingdatavisualization.toolkit

Visualization of tagged elements (collapsing, expanding, highlighting) in NLP


Hi all NLP researchers,

I have a sentence in which some tokens are tagged with a customized tagset (e.g. SUB, PRD, OBJ, etc.). The offset information of these tags is represented in XML-style file format(XMI) as below.

<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/transform/type.ecore" xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0"/>
<type2:DocumentMetaData xmi:id="1" sofa="12" begin="0" end="28" language="x-unspecified" documentTitle="visualization-example2.txt" documentId="admin" documentUri="file:/C:/Users/Administrator/.webanno/repository/project/1/document/14/source/visualization-example2.txt" collectionId="file:/C:/Users/Administrator/.webanno/repository/project/1/document/14/source/" documentBaseUri="file:/C:/Users/Administrator/.webanno/repository/project/1/document/14/source/" isLastSegment="false"/>
<type4:Sentence xmi:id="19" sofa="12" begin="0" end="28"/>
<type4:Token xmi:id="23" sofa="12" begin="0" end="1"/>
<type4:Token xmi:id="32" sofa="12" begin="2" end="6"/>
<type4:Token xmi:id="41" sofa="12" begin="7" end="8"/>
<type4:Token xmi:id="50" sofa="12" begin="9" end="12"/>
<type4:Token xmi:id="59" sofa="12" begin="13" end="17"/>
<type4:Token xmi:id="68" sofa="12" begin="18" end="22"/>
<type4:Token xmi:id="77" sofa="12" begin="23" end="27"/>
<type4:Token xmi:id="86" sofa="12" begin="27" end="28"/>
<chunk:Chunk xmi:id="95" sofa="12" begin="0" end="1" chunkValue="SUB"/>
<chunk:Chunk xmi:id="100" sofa="12" begin="2" end="28" chunkValue="PRD"/>
<chunk:Chunk xmi:id="105" sofa="12" begin="2" end="6" chunkValue="VERB"/>
<chunk:Chunk xmi:id="110" sofa="12" begin="7" end="27" chunkValue="OBJ"/>
<chunk:Chunk xmi:id="115" sofa="12" begin="7" end="12" chunkValue="HED"/>
<chunk:Chunk xmi:id="120" sofa="12" begin="13" end="27" chunkValue="PP"/>
<type2:TagsetDescription xmi:id="125" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency" name="UD Universal Dependencies"/>
<type2:TagsetDescription xmi:id="132" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity" name="Named Entity tags"/>
<type2:TagsetDescription xmi:id="139" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation" name="Operation"/>
<type2:TagsetDescription xmi:id="146" sofa="12" begin="0" end="0" layer="de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" name="UD Universal POS tags"/>
<cas:Sofa xmi:id="12" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="I want a dog with long hair."/>
<cas:View sofa="12" members="1 19 23 32 41 50 59 68 77 86 95 100 105 110 115 120 125 132 139 146"/></xmi:XMI>
  • What I want to do is to visualize these tags like below. " I want a dog with long hair" |_SUB_| |___________PRD_________________| |_VERB_| |________OBJ___________| |__HED__||_____PP______|

  • Plus, I want to collapse and expand these tags in every hierarchy like, PRD-> VERB OBJ; OBJ-> HED PP.

  • Also, I want to highlight the span every tags covers on the sentence when moving cursor hovering over the specific tag. (so, It would be best to be shown on GUI environment)

Basically, it is a binary tree structure, so I've looked for some related visualization packages in Python such as Dash and Plotly, but it seems doesn’t suit my particular needs very well.

I’d really appreciate any advice for this task, and any tips would be very helpful for me. Thanks.


Solution

  • You can use BALKANGraph javascript diagramming library to achieve requested functionality

    OrgChart JS supports expand/collapse

    I'm not sure exactly what you want to highlight but in the demo bellow only the parent node is highlighted you can use it as starting point to implement your own logic

    enter image description here

            OrgChart.templates.sentence = Object.assign({}, OrgChart.templates.ana);
            OrgChart.templates.sentence.size = [520, 120];
            OrgChart.templates.sentence.field_0 = '<text class="field_0"  style="font-size: 24px;" fill="#ffffff" x="260" y="90" text-anchor="middle">{val}</text>';
            OrgChart.templates.sentence.field_1 = '<text class="field_1"  style="font-size: 16px;" fill="#ffffff" x="500" y="30" text-anchor="end">{val}</text>';
            OrgChart.templates.sentence.node = '<rect x="0" y="0" height="120" width="520" fill="#039BE5" stroke-width="1" stroke="#aeaeae" rx="5" ry="5"></rect>';
    
    
                var chart = new OrgChart(document.getElementById("tree"), {
                    nodeBinding: {
                        field_0: "type",
                        field_1: "text"                    
                    },
                    orientation: BALKANGraph.orientation.top_left,
                    tags: {
                        "sentence": {
                            template: "sentence"
                        }
                    },
                    links: [
                        { from: 2, to: 1 },
                        { from: 3, to: 1 },
                        { from: 4, to: 3 },
                        { from: 5, to: 3 },
                        { from: 6, to: 5 },
                        { from: 7, to: 5 }
                    ],
                    nodes: [
                        { id: 1, text: "I want a dog  with long hair", type:"SENTENCE", tags: ["sentence"] },
                        { id: 2, text: "I", type: "SUB" },
                        { id: 3, text: "want a dog with long hair", type: "PRD" },
                        { id: 4, text: "want", type: "VERB" },
                        { id: 5, text: "a dog  with long hair", type: "OBJ" },
                        { id: 6, text: "a dog", type: "HED" },
                        { id: 7, text: "with long hair", type: "PP"  }
                    ]
                });
    
                var nodeEelements = chart.getNodeElements();
                for (var i = 0; i < nodeEelements.length; i++) {
                    nodeEelements[i].addEventListener("mouseover", function () {
                        this.classList.add("highlight");
                        var nodeId = this.getAttribute("node-id");
                        var parent = chart.nodes[nodeId].parent;
                        if (parent != null) {
                            chart.getNodeElement(parent.id).classList.add("highlight");
                        }
                    });
    
                    nodeEelements[i].addEventListener("mouseleave", function () {
                        this.classList.remove("highlight");
                        var nodeId = this.getAttribute("node-id");
                        var parent = chart.nodes[nodeId].parent;
                        if (parent != null) {
                            chart.getNodeElement(parent.id).classList.remove("highlight");
                        }
                    });
                }
            
            html, body {
                margin: 0px;
                padding: 0px;
                width: 100%;
                height: 100%;
                overflow: hidden;
                text-align: center;
                font-family: Helvetica;
            }
    
            #tree {
                width: 100%;
                height: 100%;
            }
    
            .highlight rect{
                fill: #F57C00 !important;
            }
    <script src="https://balkangraph.com/js/latest/OrgChart.js"></script>
    
        <div id="tree"></div>