Search code examples
pythonhtmllxmlkmlgoogle-earth

Editing the html content of <description> of a KML using lxml


I want to replace the html inside the description tag of a KML with a new, formatted html.

My kml has this structure:

<html>
 <body>
  <kml xmlns="http://www.opengis.net/kml/2.2" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:kml="http://www.opengis.net/kml/2.2">
   <document id="WATER_MAINLINE_trim" xsi:schemalocation="http://www.opengis.net/kml/2.2 http://schemas.opengis.net/kml/2.2.0/ogckml22.xsd http://www.google.com/kml/ext/2.2 http://code.google.com/apis/kml/schema/kml22gx.xsd">
    <name>
     WATER_MAINLINE_trim
    </name>
    <open>
     1
    </open>
    <snippet maxlines="0">
    </snippet>
    <style id="LineStyle00">
     <LabelStyle>
            <color>00000000</color>
            <scale>0</scale>
        </LabelStyle>
        <LineStyle>
            <color>ff240087</color>
        </LineStyle>
        <PolyStyle>
            <color>00000000</color>
            <outline>0</outline>
        </PolyStyle>
    </style>
    <folder id="FeatureLayer0">
     <name>
      WATER_MAINLINE_trim
     </name>
     <open>
      1
     </open>
     <snippet maxlines="0">
     </snippet>
     <placemark id="ID_00000">
      <name>
       0100026491
      </name>
      <snippet maxlines="0">
      </snippet>
      <description>
       <meta content="text/html" http-equiv="Content-Type" />
       <meta content="text/html; charset=utf-8" http-equiv="content-type" />
       <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px">
        <tr style="text-align:center;font-weight:bold;background:#9CBCE2">
         <td>
          0100026491
         </td>
        </tr>
        <tr>
         <td>
          <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px">
           <tr>
            <td>
             FID
            </td>
            <td>
             0
            </td>
           </tr>
           <tr bgcolor="#D4E4F3">
            <td>
             PRIKEY
            </td>
            <td>
             0100026491
            </td>
           </tr>
           <tr>
            <td>
             YEAR_INST
            </td>
            <td>
             2001
            </td>
           </tr>
           <tr bgcolor="#D4E4F3">
            <td>
             PIPE_CLASS
            </td>
            <td>
             PRIMARY
            </td>
           </tr>
           <tr>
            <td>
             DIAMETER
            </td>
            <td>
             1500
            </td>
           </tr>
           <tr bgcolor="#D4E4F3">
            <td>
             MATERIAL
            </td>
            <td>
             SP
            </td>
           </tr>
           <tr>
            <td>
             STATUS
            </td>
            <td>
             ACTIVE
            </td>
           </tr>
           <tr bgcolor="#D4E4F3">
            <td>
             BA
            </td>
            <td>
             FCOM
            </td>
           </tr>
           <tr>
            <td>
             SUBCLASS
            </td>
            <td>
             WATER MAINLINE
            </td>
           </tr>
          </table>
         </td>
        </tr>
       </table>
      </description>
     </placemark>
    </folder>
   </document>
  </kml>
 </body>
</html>

And I have this new html:

newhtml="""<![CDATA[ \n<!------------TITLE SUBCLASS---------------->\n  <tr>\n    <td colspan="2" align="center">\n      <b><font color=\'#090259\' size=\'6\' style = \'bold\'>LA MESA BALARA</font><b>\n    </td>/n  </tr>\n<!------------IMAGE---------------->\n  <tr>\n    <td colspan="2" align="center">\n      <img src= http://static.rappler.com/images/640-lamesadam-20120728.jpg, width=500, height = 223, alt="picture" />\n    </td>\n  </tr>\n<!------------PRIKEY---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>PRIKEY</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>0100026491</p>\n    </td>\n<!------------YEAR INSTALLED---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Year Installed</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>2001</p>\n    </td>\n<!------------PIPE CLASS---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Pipe Class</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>PRIMARY</p>\n    </td>\n<!------------DIAMETER---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Diameter (mm)</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>1500.000000</p>\n    </td>\n<!------------MATERIAL---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Material</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>SP</p>\n    </td>\n<!------------STATUS---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Status</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>ACTIVE</p>\n    </td>\n<!------------BUSINESS ADDRESS---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Business Address</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>Fairview-Commonwealth</p>\n    </td>]]>"""

How do I properly replace it inside a parsed kml using lxml, and still be a valid KML? With 'valid', I pertain to kml that can be loaded on Google Earth. I have tried doing the replacement using BeautifulSoup but my outfile gets an error when loaded on Google Earth. It says, "Unexpected element "html"". So I just want to use lxml for this instead. Any help will be appreciated. Thank you!

I have this sample kml which contain 5 LineString placemarks.

trim.kml = https://sites.google.com/site/kmlhostingmwss/trim.kml


Solution

  • Since KML is a valid XML file, consider XSLT, the transformation language used specifically to modify XML documents and Python's lxml can run XSLT 1.0 scripts.

    Specifically, below the dynamic XSLT is parsed from string, running first the Identity Transform to copy document as is and then replaces every occurrence of <description> with the newhtml variable.

    import lxml.etree as ET
    
    # READ IN KML FILE
    dom = ET.parse('trim.kml')
    
    newhtml = """<![CDATA[\n<!------------TITLE SUBCLASS---------------->\n  <tr>\n    <td colspan="2" align="center">\n      <b><font color=\'#090259\' size=\'6\' style = \'bold\'>LA MESA BALARA</font><b>\n    </td>/n  </tr>\n<!------------IMAGE---------------->\n  <tr>\n    <td colspan="2" align="center">\n      <img src= http://static.rappler.com/images/640-lamesadam-20120728.jpg, width=500, height = 223, alt="picture" />\n    </td>\n  </tr>\n<!------------PRIKEY---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>PRIKEY</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>0100026491</p>\n    </td>\n<!------------YEAR INSTALLED---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Year Installed</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>2001</p>\n    </td>\n<!------------PIPE CLASS---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Pipe Class</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>PRIMARY</p>\n    </td>\n<!------------DIAMETER---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Diameter (mm)</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>1500.000000</p>\n    </td>\n<!------------MATERIAL---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Material</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>SP</p>\n    </td>\n<!------------STATUS---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Status</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>ACTIVE</p>\n    </td>\n<!------------BUSINESS ADDRESS---------------->\n  <tr>\n    <td bgcolor = \'#090259\', align="center" >\n      <p><font color = \'FFFFFF\', size =\'4\'>Business Address</p>\n    </td>\n \n    <td bgcolor = \'#d8d8ff\' align="center">\n      <p>Fairview-Commonwealth</p>\n    </td>]]>"""
    
    # PARSE XSL FROM STRING
    xslstr = '''<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
    xmlns:ogc="http://www.opengis.net/ogc" xmlns:wfs="http://www.opengis.net/wfs">
    <xsl:output version="1.0" encoding="UTF-8" indent="yes" />
    <xsl:strip-space elements="*"/>
    
      <xsl:template match="@*|node()">
        <xsl:copy>
          <xsl:apply-templates select="@*|node()"/>
        </xsl:copy>
      </xsl:template>
    
      <xsl:template match="description">
        <xsl:copy>
          <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
          <xsl:text disable-output-escaping="yes">{}</xsl:text>
          <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
        </xsl:copy>  
      </xsl:template>
    
    </xsl:transform>'''.format(newhtml)
    
    xslt = ET.fromstring(xslstr)
    
    # TRANSFORM SOURCE TO NEW TREE
    transform = ET.XSLT(xslt)
    newdom = transform(dom)
    
    # OUTPUT TO FILE
    tree_out = ET.tostring(newdom, encoding='UTF-8', pretty_print=True, xml_declaration=True)
    
    xmlfile = open('newTrim.kml','wb')
    xmlfile.write(tree_out)
    xmlfile.close()