Search code examples
xmlxsltconvertersgraphml

Convert a graphML file into another graphML using XSL?


Hi have a simple graphML file composed of 3 nodes and 2 connections and I would like to convert it so that the internal structure of tags and attribute is differently organized.

The original file is the following:

    <?xml version="1.0" encoding="utf-8"?><graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
  <graph edgedefault="undirected">
    <node id="0">
      <data key="d0">rBSTS</data>
      <data key="d1" />
      <data key="d2" />
      <data key="d3">n1</data>
      <data key="d4" />
    </node>
    <node id="1">
      <data key="d1" />
      <data key="d4" />
      <data key="d0">rCAC</data>
      <data key="d2" />
      <data key="d3">n2</data>
    </node>
    <node id="2">
      <data key="d1" />
      <data key="d4" />
      <data key="d0">rCMF</data>
      <data key="d2" />
      <data key="d3">n3</data>
    </node>
     <edge source="0" target="1">
      <data key="d5">0.252829037184</data>
    </edge>
    <edge source="1" target="2">
      <data key="d5">0.205407183132</data>
    </edge>
 </graph>
</graphml>

While the file I would like to obtain is the following (I converted it manually to show the desired result):

<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">

<graph edgedefault="undirected">

<node id="n1">
    <data key="dn_href"></data>
    <data key="dn_label">rBSTS</data>
    <data key="dn_free">rBSTS</data>
    <data key="dn_intensityvalue">1</data>
</node>
<node id="n2">
    <data key="dn_href"></data>
    <data key="dn_label">rCAC</data>
    <data key="dn_free">rCAC</data>
    <data key="dn_intensityvalue">2</data>
</node>
<node id="n3">
    <data key="dn_href"></data>
    <data key="dn_label">rCMF</data>
    <data key="dn_free">rCMF</data>
    <data key="dn_intensityvalue">3</data>
</node>
<edge id="e1_2" source="n1" target="n2">
    <data key="de_strength">0.252829037184</data>
</edge>
<edge id="e1_3" source="n2" target="n3">
    <data key="de_strength">0.205407183132</data>
</edge>
</graph>
</graphml>

The change of structure is not so easy (e.g. the node ID starts from 0 in the original data structure while it starts from n1 in the desired output): is it possible to convert it by using an XSL transformation?


Solution

  • I've made a few assumptions about the conversion logic you want...

    This XSLT stylesheet:

    <xsl:stylesheet version="1.0" 
            xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
            xmlns:ns="http://graphml.graphdrawing.org/xmlns"
            xmlns="http://graphml.graphdrawing.org/xmlns"
            exclude-result-prefixes="ns #default">
    
      <xsl:output method="xml" 
              version="1.0" 
              indent="yes" 
              omit-xml-declaration="yes"/>
    
      <!-- The identity transform. By itself, copies out the original input. -->
      <xsl:template match="node()|@*">
        <xsl:copy>
          <xsl:apply-templates select="node()|@*"/>
        </xsl:copy>
      </xsl:template>
    
      <xsl:template match="ns:node">
        <!-- Copy out this element. -->
        <xsl:copy>
          <!-- Give it a new 'id' attribute that takes the value of the 'data'
           element for 'key' of 'd3'. -->
          <xsl:attribute name="id">
        <xsl:value-of select="ns:data[@key='d3']"/>
          </xsl:attribute>
          <!-- Output empty 'data' element for 'key' of 'dn_href'. -->
          <data key="dn_href"/>
          <!-- Do something particular for 'key' of 'd0'. -->
          <xsl:apply-templates select="ns:data[@key='d0']"/>
          <!-- Don't know where this value comes from! -->
          <data key="dn_intensity_value"/>
        </xsl:copy>
      </xsl:template>
    
      <xsl:template match="ns:edge">
        <xsl:copy>
          <!-- Add an 'id' attribute. I'll let you work out what it 
           should be. -->
          <xsl:attribute name="id"/>
          <xsl:apply-templates select="node()|@*"/>
        </xsl:copy>
      </xsl:template>
    
      <xsl:template match="ns:data[@key='d0']">
        <!-- It seems we want two 'data' elements, with different 'key's,
             and the text from the element with 'key' of 'd0'. -->
        <xsl:copy>
          <xsl:attribute name="key">dn_label</xsl:attribute>
          <xsl:apply-templates select="text()"/>
        </xsl:copy>
        <xsl:copy>
          <xsl:attribute name="key">dn_free</xsl:attribute>
          <xsl:apply-templates select="text()"/>
        </xsl:copy>
      </xsl:template>
    
      <!-- It seems 'd5' keys get changed to 'de_strength'. -->
      <xsl:template match="@key[. = 'd5']">
        <xsl:attribute name="key">
          <xsl:value-of select="'de_strength'"/>
        </xsl:attribute>
      </xsl:template>
    
      <!-- Mapping for source attributes. -->
      <xsl:template match="ns:edge/@source">
          <xsl:attribute name="source">
        <xsl:value-of select="//ns:node[@id=current()]/ns:data[@key='d3']"/>
          </xsl:attribute>
      </xsl:template>
    
      <!-- Mapping for target attributes. -->
      <xsl:template match="ns:edge/@target">
          <xsl:attribute name="target">
        <xsl:value-of select="//ns:node[@id=current()]/ns:data[@key='d3']"/>
          </xsl:attribute>
      </xsl:template>
    
    </xsl:stylesheet>
    

    produces the following output when applied to your example input:

    <graphml xmlns="http://graphml.graphdrawing.org/xmlns"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
      <graph edgedefault="undirected">
          <node id="n1">
             <data key="dn_href"/>
             <data key="dn_label">rBSTS</data>
             <data key="dn_free">rBSTS</data>
             <data key="dn_intensity_value"/>
          </node>
          <node id="n2">
             <data key="dn_href"/>
             <data key="dn_label">rCAC</data>
             <data key="dn_free">rCAC</data>
             <data key="dn_intensity_value"/>
          </node>
          <node id="n3">
             <data key="dn_href"/>
             <data key="dn_label">rCMF</data>
             <data key="dn_free">rCMF</data>
             <data key="dn_intensity_value"/>
          </node>
          <edge id="" source="n1" target="n2">
             <data key="de_strength">0.252829037184</data>
          </edge>
          <edge id="" source="n2" target="n3">
             <data key="de_strength">0.205407183132</data>
          </edge>
      </graph>
    </graphml>
    

    As you can see, it's not perfect (extra namespaces, a few attribute values missing) but hopefully it shows you what you might do.