Search code examples
xmlxsltxslt-1.0xslt-2.0

Split XML file into multiple files using XSLT


I am having trouble splitting an XML file into multiple files using XSLT.

I've tried splitting the file based on the attached XSLT code. I've succeeded in getting the file to split. However, I haven't been able to figure out how to replicate the file and create one for each in the original file.

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="2.0">

<xsl:output method="xml" indent="yes" name="xml"/>


<xsl:template match="/">
<xsl:for-each select="//FlightGroup">
<xsl:variable 
    name="filehandle"
    select="//Info/Campaign/CampaignNumber"/>
<xsl:variable
    name="FlightGroupId"
    select="FlightGroup-id"
    />
<xsl:variable name="filename"
  select="concat('file:///c:/Temp/',$filehandle,'_',$FlightGroupId,'.xml')" />
<xsl:value-of select="$filename" />   <!-- Creating  -->
<xsl:result-document href="{$filename}" format="xml">
    <Data>
        <filename><xsl:value-of select = "concat($filehandle, '_', $FlightGroupId)"/></filename>
        <xsl:copy-of select = "//OrderCustomers" />
        <xsl:copy-of select = "//Campaign"/>
        <xsl:copy-of select = "." />
    </Data>
    </xsl:result-document>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

I need to be able to generate one file for each flight group with the rest of the file being a duplicate of the original.

For example:

Source File:

<?xml version="1.0" encoding="utf-8"?>
<Data>
  <Info>
    <OrderCustomers>
      <OrderCustomer>
        <IsPrimaryOrderer>true</IsPrimaryOrderer>
        <IsPrimaryPayor>true</IsPrimaryPayor>
        <Customer>
          <account-number>20007658</account-number>
          <accountid>7659</accountid>
          <Name1>DIGITAL TEST ACCOUNT</Name1>
          <Phone>0000000000</Phone>
          <Type-id>2</Type-id>
          <Type>Commercial</Type>
          <Company-id>1</Company-id>
          <Company>Company1</Company>
          <Address>
            <Addr1>123 Main</Addr1>
            <City>Anytown</City>
            <Postal-Code>99999</Postal-Code>
            <Country-id>1</Country-id>
            <Country>USA</Country>
            <State-id>113</State-id>
            <State>TX</State>
          </Address>
          <PrimarySalesRep>Rep1</PrimarySalesRep>
          <Category>Local</Category>
          <PaymentMethod-id>4</PaymentMethod-id>
          <PaymentMethod>Credit Card</PaymentMethod>
        </Customer>
      </OrderCustomer>
    </OrderCustomers>
    <ad-ordered-by>test</ad-ordered-by>
    <publication-code>Y</publication-code>
    <pagination-code>N</pagination-code>
    <ProductionCode>N</ProductionCode>
    <ad-sold-by-id>113</ad-sold-by-id>
    <ad-sold-by>Rep1</ad-sold-by>
    <ad-sold-by-name>National</ad-sold-by-name>
    <ad-order-taker-id>15</ad-order-taker-id>
    <ad-order-taker>jsmith</ad-order-taker>
    <CreateDate>07022019 13:50:31</CreateDate>
    <LastEditDate>07022019 13:50:33</LastEditDate>
    <LastEditUser>jsmith</LastEditUser>
    <order-number>00000994</order-number>
    <price>10.00</price>
    <company-id>1</company-id>
    <company>Company1</company>
    <CreationDate>070220191350</CreationDate>
    <Campaign>
      <CampaignId>205</CampaignId>
      <CampaignNumber>00994-01</CampaignNumber>
      <Name>DIGITAL TEST ACCOUNT01</Name>
      <Type-id>1</Type-id>
      <Type>Banner</Type>
      <Category-id>1</Category-id>
      <Category>Standard</Category>
      <StartDate>07072019</StartDate>
      <EndDate>07312019</EndDate>
      <Quantity>1000</Quantity>
      <Price>10.00</Price>
      <CampaignUnit>
        <Id>7</Id>
        <Name>300x250</Name>
        <Width>300</Width>
        <Height>250</Height>
      </CampaignUnit>
      <CampaignUnit>
        <Id>6</Id>
        <Name>728x90</Name>
        <Width>728</Width>
        <Height>90</Height>
      </CampaignUnit>
      <FlightGroup>
        <Name>Group1</Name>
        <FlightGroup-id>296</FlightGroup-id>
        <CampaignUnit-id>7</CampaignUnit-id>
        <CampaignUnit>300x250</CampaignUnit>
        <Quantity>500</Quantity>
        <StartDate>07072019</StartDate>
        <EndDate>07312019</EndDate>
        <Flight>
          <Flight-id>666</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>3</Page-id>
          <Page>ATF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <InvoicedAlreadyFlag>0</InvoicedAlreadyFlag>
          <PublishedFlag>true</PublishedFlag>
          <Price>2.50</Price>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26901" PublishedFlag="true">07072019</date>
            <date Insertion-id="26902" PublishedFlag="true">07082019</date>
            <date Insertion-id="26903" PublishedFlag="true">07092019</date>
            <date Insertion-id="26904" PublishedFlag="true">07102019</date>
            <date Insertion-id="26905" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
        <Flight>
          <Flight-id>667</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>4</Page-id>
          <Page>BTF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26926" PublishedFlag="true">07072019</date>
            <date Insertion-id="26927" PublishedFlag="true">07082019</date>
            <date Insertion-id="26928" PublishedFlag="true">07092019</date>
            <date Insertion-id="26929" PublishedFlag="true">07102019</date>
            <date Insertion-id="26930" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
      </FlightGroup>
      <FlightGroup>
        <Name>Group2</Name>
        <FlightGroup-id>297</FlightGroup-id>
        <CampaignUnit-id>6</CampaignUnit-id>
        <CampaignUnit>728x90</CampaignUnit>
        <Quantity>500</Quantity>
        <StartDate>07072019</StartDate>
        <EndDate>07312019</EndDate>
        <Flight>
          <Flight-id>668</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>3</Page-id>
          <Page>ATF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26951" PublishedFlag="true">07072019</date>
            <date Insertion-id="26952" PublishedFlag="true">07082019</date>
            <date Insertion-id="26953" PublishedFlag="true">07092019</date>
            <date Insertion-id="26954" PublishedFlag="true">07102019</date>
            <date Insertion-id="26955" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
        <Flight>
          <Flight-id>669</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>4</Page-id>
          <Page>BTF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26976" PublishedFlag="true">07072019</date>
            <date Insertion-id="26977" PublishedFlag="true">07082019</date>
            <date Insertion-id="26978" PublishedFlag="true">07092019</date>
            <date Insertion-id="26979" PublishedFlag="true">07102019</date>
            <date Insertion-id="26980" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
      </FlightGroup>
      <FrequencyCap />
      <AdServingId>3</AdServingId>
      <AdServing>DFP</AdServing>
      <CampaignViewTypeId>2</CampaignViewTypeId>
      <CampaignViewType>Impressions</CampaignViewType>
      <CreateDate>07022019 13:50:31</CreateDate>
      <CreateUser>Rep1</CreateUser>
      <LastEditDate>07022019 13:50:33</LastEditDate>
      <LastEditUser>Rep1</LastEditUser>
    </Campaign>
    <Company>
      <ID>1</ID>
      <COMPANYCODE>Company1</COMPANYCODE>
    </Company>
  </Info>
</Data>

File #1 - 00994-01_296.xml

<?xml version="1.0" encoding="utf-8"?>
<Data>
  <Info>
    <OrderCustomers>
      <OrderCustomer>
        <IsPrimaryOrderer>true</IsPrimaryOrderer>
        <IsPrimaryPayor>true</IsPrimaryPayor>
        <Customer>
          <account-number>20007658</account-number>
          <accountid>7659</accountid>
          <Name1>DIGITAL TEST ACCOUNT</Name1>
          <Phone>0000000000</Phone>
          <Type-id>2</Type-id>
          <Type>Commercial</Type>
          <Company-id>1</Company-id>
          <Company>Company1</Company>
          <Address>
            <Addr1>123 Main</Addr1>
            <City>Anytown</City>
            <Postal-Code>99999</Postal-Code>
            <Country-id>1</Country-id>
            <Country>USA</Country>
            <State-id>113</State-id>
            <State>TX</State>
          </Address>
          <PrimarySalesRep>Rep1</PrimarySalesRep>
          <Category>Local</Category>
          <PaymentMethod-id>4</PaymentMethod-id>
          <PaymentMethod>Credit Card</PaymentMethod>
        </Customer>
      </OrderCustomer>
    </OrderCustomers>
    <ad-ordered-by>test</ad-ordered-by>
    <publication-code>Y</publication-code>
    <pagination-code>N</pagination-code>
    <ProductionCode>N</ProductionCode>
    <ad-sold-by-id>113</ad-sold-by-id>
    <ad-sold-by>Rep1</ad-sold-by>
    <ad-sold-by-name>National</ad-sold-by-name>
    <ad-order-taker-id>15</ad-order-taker-id>
    <ad-order-taker>jsmith</ad-order-taker>
    <CreateDate>07022019 13:50:31</CreateDate>
    <LastEditDate>07022019 13:50:33</LastEditDate>
    <LastEditUser>jsmith</LastEditUser>
    <order-number>00000994</order-number>
    <price>10.00</price>
    <company-id>1</company-id>
    <company>Company1</company>
    <CreationDate>070220191350</CreationDate>
    <Campaign>
      <CampaignId>205</CampaignId>
      <CampaignNumber>00994-01</CampaignNumber>
      <Name>DIGITAL TEST ACCOUNT01</Name>
      <Type-id>1</Type-id>
      <Type>Banner</Type>
      <Category-id>1</Category-id>
      <Category>Standard</Category>
      <StartDate>07072019</StartDate>
      <EndDate>07312019</EndDate>
      <Quantity>1000</Quantity>
      <Price>10.00</Price>
      <CampaignUnit>
        <Id>7</Id>
        <Name>300x250</Name>
        <Width>300</Width>
        <Height>250</Height>
      </CampaignUnit>
      <CampaignUnit>
        <Id>6</Id>
        <Name>728x90</Name>
        <Width>728</Width>
        <Height>90</Height>
      </CampaignUnit>
      <FlightGroup>
        <Name>Group1</Name>
        <FlightGroup-id>296</FlightGroup-id>
        <CampaignUnit-id>7</CampaignUnit-id>
        <CampaignUnit>300x250</CampaignUnit>
        <Quantity>500</Quantity>
        <StartDate>07072019</StartDate>
        <EndDate>07312019</EndDate>
        <Flight>
          <Flight-id>666</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>3</Page-id>
          <Page>ATF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <InvoicedAlreadyFlag>0</InvoicedAlreadyFlag>
          <PublishedFlag>true</PublishedFlag>
          <Price>2.50</Price>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26901" PublishedFlag="true">07072019</date>
            <date Insertion-id="26902" PublishedFlag="true">07082019</date>
            <date Insertion-id="26903" PublishedFlag="true">07092019</date>
            <date Insertion-id="26904" PublishedFlag="true">07102019</date>
            <date Insertion-id="26905" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
        <Flight>
          <Flight-id>667</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>4</Page-id>
          <Page>BTF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26926" PublishedFlag="true">07072019</date>
            <date Insertion-id="26927" PublishedFlag="true">07082019</date>
            <date Insertion-id="26928" PublishedFlag="true">07092019</date>
            <date Insertion-id="26929" PublishedFlag="true">07102019</date>
            <date Insertion-id="26930" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
      </FlightGroup>
      <FrequencyCap />
      <AdServingId>3</AdServingId>
      <AdServing>DFP</AdServing>
      <CampaignViewTypeId>2</CampaignViewTypeId>
      <CampaignViewType>Impressions</CampaignViewType>
      <CreateDate>07022019 13:50:31</CreateDate>
      <CreateUser>Rep1</CreateUser>
      <LastEditDate>07022019 13:50:33</LastEditDate>
      <LastEditUser>Rep1</LastEditUser>
    </Campaign>
    <Company>
      <ID>1</ID>
      <COMPANYCODE>Company1</COMPANYCODE>
    </Company>
  </Info>
</Data>

File #2 - 00994-01_297.xml

<?xml version="1.0" encoding="utf-8"?>
<Data>
  <Info>
    <OrderCustomers>
      <OrderCustomer>
        <IsPrimaryOrderer>true</IsPrimaryOrderer>
        <IsPrimaryPayor>true</IsPrimaryPayor>
        <Customer>
          <account-number>20007658</account-number>
          <accountid>7659</accountid>
          <Name1>DIGITAL TEST ACCOUNT</Name1>
          <Phone>0000000000</Phone>
          <Type-id>2</Type-id>
          <Type>Commercial</Type>
          <Company-id>1</Company-id>
          <Company>Company1</Company>
          <Address>
            <Addr1>123 Main</Addr1>
            <City>Anytown</City>
            <Postal-Code>99999</Postal-Code>
            <Country-id>1</Country-id>
            <Country>USA</Country>
            <State-id>113</State-id>
            <State>TX</State>
          </Address>
          <PrimarySalesRep>Rep1</PrimarySalesRep>
          <Category>Local</Category>
          <PaymentMethod-id>4</PaymentMethod-id>
          <PaymentMethod>Credit Card</PaymentMethod>
        </Customer>
      </OrderCustomer>
    </OrderCustomers>
    <ad-ordered-by>test</ad-ordered-by>
    <publication-code>Y</publication-code>
    <pagination-code>N</pagination-code>
    <ProductionCode>N</ProductionCode>
    <ad-sold-by-id>113</ad-sold-by-id>
    <ad-sold-by>Rep1</ad-sold-by>
    <ad-sold-by-name>National</ad-sold-by-name>
    <ad-order-taker-id>15</ad-order-taker-id>
    <ad-order-taker>jsmith</ad-order-taker>
    <CreateDate>07022019 13:50:31</CreateDate>
    <LastEditDate>07022019 13:50:33</LastEditDate>
    <LastEditUser>jsmith</LastEditUser>
    <order-number>00000994</order-number>
    <price>10.00</price>
    <company-id>1</company-id>
    <company>Company1</company>
    <CreationDate>070220191350</CreationDate>
    <Campaign>
      <CampaignId>205</CampaignId>
      <CampaignNumber>00994-01</CampaignNumber>
      <Name>DIGITAL TEST ACCOUNT01</Name>
      <Type-id>1</Type-id>
      <Type>Banner</Type>
      <Category-id>1</Category-id>
      <Category>Standard</Category>
      <StartDate>07072019</StartDate>
      <EndDate>07312019</EndDate>
      <Quantity>1000</Quantity>
      <Price>10.00</Price>
      <CampaignUnit>
        <Id>7</Id>
        <Name>300x250</Name>
        <Width>300</Width>
        <Height>250</Height>
      </CampaignUnit>
      <CampaignUnit>
        <Id>6</Id>
        <Name>728x90</Name>
        <Width>728</Width>
        <Height>90</Height>
      </CampaignUnit>
      <FlightGroup>
        <Name>Group2</Name>
        <FlightGroup-id>297</FlightGroup-id>
        <CampaignUnit-id>6</CampaignUnit-id>
        <CampaignUnit>728x90</CampaignUnit>
        <Quantity>500</Quantity>
        <StartDate>07072019</StartDate>
        <EndDate>07312019</EndDate>
        <Flight>
          <Flight-id>668</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>3</Page-id>
          <Page>ATF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26951" PublishedFlag="true">07072019</date>
            <date Insertion-id="26952" PublishedFlag="true">07082019</date>
            <date Insertion-id="26953" PublishedFlag="true">07092019</date>
            <date Insertion-id="26954" PublishedFlag="true">07102019</date>
            <date Insertion-id="26955" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
        <Flight>
          <Flight-id>669</Flight-id>
          <Site-id>2</Site-id>
          <NetworkId>3</NetworkId>
          <Network>Digital</Network>
          <Site>anysite.com</Site>
          <Section-id>20</Section-id>
          <Section>Business</Section>
          <Page-id>4</Page-id>
          <Page>BTF</Page>
          <SiteURL>www.anysite.com</SiteURL>
          <Quantity>250</Quantity>
          <dates>
            <date Insertion-id="26976" PublishedFlag="true">07072019</date>
            <date Insertion-id="26977" PublishedFlag="true">07082019</date>
            <date Insertion-id="26978" PublishedFlag="true">07092019</date>
            <date Insertion-id="26979" PublishedFlag="true">07102019</date>
            <date Insertion-id="26980" PublishedFlag="true">07112019</date>
          </dates>
        </Flight>
      </FlightGroup>
      <FrequencyCap />
      <AdServingId>3</AdServingId>
      <AdServing>DFP</AdServing>
      <CampaignViewTypeId>2</CampaignViewTypeId>
      <CampaignViewType>Impressions</CampaignViewType>
      <CreateDate>07022019 13:50:31</CreateDate>
      <CreateUser>Rep1</CreateUser>
      <LastEditDate>07022019 13:50:33</LastEditDate>
      <LastEditUser>Rep1</LastEditUser>
    </Campaign>
    <Company>
      <ID>1</ID>
      <COMPANYCODE>Company1</COMPANYCODE>
    </Company>
  </Info>
</Data>

Solution

  • "I need to be able to generate one file for each flight group with the rest of the file being a duplicate of the original.": Then using an approach like

    <xsl:template match="FlightGroup">
      <xsl:result-document href="{/Info/Campaign/CampaignNumber}-{FlightGroup-id}.xml">
          <xsl:apply-templates select="/" mode="split">
             <xsl:with-param name="this-flight-group" select="." tunnel="yes"/>
          </xsl:apply-templates>
      </xsl:result-document>
    </xsl:template>
    
    <xsl:template match="@* | node()" mode="split">
      <xsl:copy>
        <xsl:apply-templates select="@* | node()" mode="#current"/>
      </xsl:copy>
    </xsl:template>
    
    <xsl:template match="FlightGroup" mode="split">
      <xsl:param name="this-flight-group" tunnel="yes"/>
      <xsl:if test=". is $this-flight-group">
        <xsl:next-match/>
      </xsl:if>
    </xsl:template>
    

    should do. It sets up a template for FlightGroup elements in the unnamed default mode and creates the result document, then it pushes the whole document through the mode named split, storing the current FlightGroup element in a tunnel parameter. The mode is set up to use the identity transformation as a base processing and on matching FlightGroup elements it has a template comparing the tunnel parameter to the currently processed FlightGroup based on node identity with the is operator, in case of identity it just delegates processing with xsl:next-match to the identity transformation, otherwise it does no further processing of any FlightGroup in that mode.

    Using XSLT 3 (as supported by Saxon 9.8 and later (all editions) and Altova XML 2017 R3 and later) and the new features of shadow attributes and static parameters one could even try to write a generic solution that takes the "split" pattern and the "file name" expression as static parameters, with that idea the above would be implemented as

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        exclude-result-prefixes="#all">
    
        <xsl:param name="split-pattern" as="xs:string" static="yes" select="'FlightGroup'"/>
    
        <xsl:param name="file-name-expression" as="xs:string" static="yes" select="'{//Info/Campaign/CampaignNumber}-{FlightGroup-id}.xml'"/>
    
        <xsl:template _match="{$split-pattern}">
            <xsl:result-document _href="{$file-name-expression}">
                <xsl:apply-templates select="/" mode="split">
                    <xsl:with-param name="this-split-element" select="." tunnel="yes"/>
                </xsl:apply-templates>
            </xsl:result-document>
        </xsl:template>
    
        <xsl:mode name="split" on-no-match="shallow-copy"/>
    
        <xsl:template _match="{$split-pattern}" mode="split">
            <xsl:param name="this-split-element" tunnel="yes"/>
            <xsl:if test=". is $this-split-element">
                <xsl:next-match/>
            </xsl:if>
        </xsl:template>
    
    </xsl:stylesheet>
    

    and anyone wanting to split on a certain element (e.g. foo) with a certain file name expression (e.g. the static string split plus the child selection id) could simply adapt the static parameters e.g. done below inline

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        exclude-result-prefixes="#all">
    
        <xsl:param name="split-pattern" as="xs:string" static="yes" select="'foo'"/>
    
        <xsl:param name="file-name-expression" as="xs:string" static="yes" select="'split-{id}.xml'"/>
    
        <xsl:template _match="{$split-pattern}">
            <xsl:result-document _href="{$file-name-expression}">
                <xsl:apply-templates select="/" mode="split">
                    <xsl:with-param name="this-split-element" select="." tunnel="yes"/>
                </xsl:apply-templates>
            </xsl:result-document>
        </xsl:template>
    
        <xsl:mode name="split" on-no-match="shallow-copy"/>
    
        <xsl:template _match="{$split-pattern}" mode="split">
            <xsl:param name="this-split-element" tunnel="yes"/>
            <xsl:if test=". is $this-split-element">
                <xsl:next-match/>
            </xsl:if>
        </xsl:template>
    
    </xsl:stylesheet>
    

    but of course XSLT 3 processors or IDEs allow you to set static parameters without even touching/changing the XSLT code itself.