Search code examples
pythonxmlpretty-printelementtreeminidom

Node.toprettyxml() adds newlines to DOCTYPE in Python


When using prettify my DOCTYPE is broken into three lines. How can I keep it on one line?

The "broken" output:

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE smil
  PUBLIC '-//W3C//DTD SMIL 2.0//EN'
  'http://www.w3.org/2001/SMIL20/SMIL20.dtd'>
<smil xmlns="http://www.w3.org/2001/SMIL20/Language">
  <head>
    <meta base="rtmp://cp23636.edgefcs.net/ondemand"/>
  </head>
  <body>
    <switch>
      <video src="mp4:soundcheck/1/clay_aiken/02_sc_ca_sorry_256.mp4" system-bitrate="336000"/>
      <video src="mp4:soundcheck/1/clay_aiken/02_sc_ca_sorry_512.mp4" system-bitrate="592000"/>
      <video src="mp4:soundcheck/1/clay_aiken/02_sc_ca_sorry_768.mp4" system-bitrate="848000"/>
      <video src="mp4:soundcheck/1/clay_aiken/02_sc_ca_sorry_1128.mp4" system-bitrate="1208000"/>
    </switch>
  </body>
</smil>

The script:

import csv
import sys
import os.path

from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement, Comment, tostring

from xml.dom import minidom

def prettify(doctype, elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = doctype + ElementTree.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ", encoding = 'utf-8')

doctype = '<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">'

video_data = ((256, 336000),
              (512, 592000),
              (768, 848000),
              (1128, 1208000))


with open(sys.argv[1], 'rU') as f:
    reader = csv.DictReader(f)
    for row in reader:
        root = Element('smil')
        root.set('xmlns', 'http://www.w3.org/2001/SMIL20/Language')
        head = SubElement(root, 'head')
        meta = SubElement(head, 'meta base="rtmp://cp23636.edgefcs.net/ondemand"')
        body = SubElement(root, 'body')

        switch_tag = ElementTree.SubElement(body, 'switch')

        for suffix, bitrate in video_data:
            attrs = {'src': ("mp4:soundcheck/{year}/{id}/{file_root_name}_{suffix}.mp4"
                             .format(suffix=str(suffix), **row)),
                     'system-bitrate': str(bitrate),
                     }
            ElementTree.SubElement(switch_tag, 'video', attrs)

        file_root_name = row["file_root_name"]
        year = row["year"]
        id = row["id"]
        path = year+'-'+id

        file_name = row['file_root_name']+'.smil'
        full_path = os.path.join(path, file_name)
        output = open(full_path, 'w')
        output.write(prettify(doctype, root))

Solution

  • Having looked over your current script and the other questions you've asked on this subject, I think you could make your life a lot simpler by building your smil files using string manipulation.

    Almost all the xml in your files is static. The only data you need to worry about processing correctly is the attribute values for the video tag. And for that, there is a handy function in the standard library that does exactly what you want: xml.sax.saxutils.quoteattr.

    So, with those points in mind, here is a script that should be a lot easier to work with:

    import sys, os, csv
    from xml.sax.saxutils import quoteattr
    
    smil_header = '''\
    <?xml version="1.0" encoding="utf-8"?>
    <!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
    <smil xmlns="http://www.w3.org/2001/SMIL20/Language">
      <head>
        <meta base="rtmp://cp23636.edgefcs.net/ondemand"/>
      </head>
      <body>
        <switch>
    '''
    smil_video = '''\
          <video src=%s system-bitrate=%s/>
    '''
    smil_footer = '''\
        </switch>
      </body>
    </smil>
    '''
    
    src_format = 'mp4:soundcheck/%(year)s/%(id)s/%(file_root_name)s_%(suffix)s.mp4'
    
    video_data = (
        ('256', '336000'), ('512', '592000'),
        ('768', '848000'), ('1128', '1208000'),
        )
    
    root = os.getcwd()
    if len(sys.argv) > 2:
        root = sys.argv[2]
    
    with open(sys.argv[1], 'rU') as stream:
    
        for row in csv.DictReader(stream):
            smil = [smil_header]
            for suffix, bitrate in video_data:
                row['suffix'] = suffix
                smil.append(smil_video % (
                    quoteattr(src_format) % row, quoteattr(bitrate)
                    ))
            smil.append(smil_footer)
    
            directory = os.path.join(root, '%(year)s-%(id)s' % row)
            try:
                os.makedirs(directory)
            except OSError:
                pass
            path = os.path.join(directory, '%(file_root_name)s.smil' % row)
            print ':: writing file:', path
            with open(path, 'wb') as stream:
                stream.write(''.join(smil))