Search code examples
sqlsql-servert-sqlxml-parsingidentifier

T-SQL: cross apply; too many iterations on line items


I've almost got what I want after shredding up some serious Xml--but after looking at the results, I see that in one section of the parsing, I can't easily resolve this pattern of iterating through all of the line details for each of the subheaders-- so instead of writing out a total of let's say 3 records for all of the line items, I'm writing out three line items for each of the subs--of which let's say I have two. I wind up with a total of 6! :-( I've distilled the pattern as a generic header/subheader/detail relationship model in the code that follows.

    DECLARE @MetroXML xml
    SET @MetroXML =
    '<Header>
       <col1>Conoco</col1>
       <col2>ORD-1111</col2>
       <SubHeaders>
         <SubHeader>
          <col1>Dallas</col1>
          <col2>BOL-2213</col2>
         <Details>
          <Detail>
            <col1>Diesel</col1>
            <col2>7600.00</col2>
          </Detail>
         </Details>
        </SubHeader>
        </SubHeaders>
        <SubHeaders>
         <SubHeader>
          <col1>Fort Worth</col1>
          <col2>BOL-2216</col2>
         <Details>
          <Detail>
            <col1>Ethanol</col1>
            <col2>1852.00</col2>
          </Detail>
          <Detail>
            <col1>Unleaded</col1>
            <col2>900.00</col2>
          </Detail>
         </Details>
       </SubHeader>
      </SubHeaders>
     </Header>';


    INSERT INTO [scratch].GenericHeader
     SELECT T.c.value('col1[1]','varchar(10)') AS 'col1',
            T.c.value('col2[1]','varchar(10)') AS 'col2'
       FROM @MetroXML.nodes('/Header') T(c);


    INSERT [scratch].GenericSubHeader
     (id,col1,col2)
    SELECT 
      h.id,
      n.x.value('col1[1]','varchar(10)') AS 'col1',
      n.x.value('col2[1]','varchar(10)') AS 'col2'
     FROM [scratch].GenericHeader h
      CROSS APPLY @MetroXML.nodes('/Header/SubHeaders/SubHeader') n(x);


     INSERT [scratch].GenericDetail
     (id,subid,col1,col2)
      SELECT 
       s.id,
       s.subid,
       n.x.value('col1[1]','varchar(10)') AS 'col1',
       n.x.value('col2[1]','varchar(10)') AS 'col2'
     FROM [scratch].GenericSubHeader s
      CROSS APPLY @MetroXML.nodes('/Header/SubHeaders/SubHeader/Details/Detail') as n(x);


     select * from [scratch].GenericHeader
      where id = 24;

     select * from [scratch].GenericSubHeader
      where id = 24;

     select * from [scratch].GenericDetail
      where id = 24;

NOTE: id,subid,detid are defined as INT IDENTITY(1,1) Results

What I get:

id|subid|detid|col1     |col2
--------------------------------
24|44   |22   |Diesel   |7600.00
24|44   |23   |Ethanol  |1852.00
24|44   |24   |Unleaded |900.00
24|48   |25   |Diesel   |7600.00
24|48   |26   |Ethanol  |1852.00
24|48   |27   |Unleaded |900.00

What I want to get:

id|subid|detid|col1     |col2
--------------------------------
24|44   |22   |Diesel   |7600.00
24|48   |23   |Ethanol  |1852.00
24|48   |24   |Unleaded |900.00

Solution

  • The reason you get duplicate rows is because you are using cross apply against the entire XML of each row in GenericSubHeader. You have to figure out a way to map the generated ID in GenericHeader and the generated subid in GenericSubHeader to the related part of the XML .

    If you are on SQL Server 2008 or later you can merge with output to create a table variable that holds the generated id and the xml sub nodes that belongs.

    declare @GH table
    (
      id int,
      sub xml
    );
    
    merge scratch.GenericHeader as T
    using 
      (
        select T.c.value('col1[1]','varchar(10)'),
               T.c.value('col2[1]','varchar(10)'),
               T.c.query('SubHeaders')
        from @MetroXML.nodes('/Header') T(c)
      ) as S(col1, col2, sub)
    on 0 = 1
    when not matched then
      insert (col1, col2) values(S.col1, S.col2)
    output inserted.id, S.sub into @GH;  
    
    declare @GSH table
    (
      id int,
      subid int,
      det xml
    );
    
    merge scratch.GenericSubHeader as T
    using
      (
        select h.id,
               n.x.value('col1[1]','varchar(10)'),
               n.x.value('col2[1]','varchar(10)'),
               n.x.query('Details')
        from @GH as h
          cross apply h.sub.nodes('/SubHeaders/SubHeader') n(x)
      ) as S(id, col1, col2, det)
    on 0 = 1
    when not matched then
      insert (id, col1, col2) values (S.id, S.col1, S.col2)
    output inserted.id, inserted.subid, S.det into @GSH;
    
    insert into scratch.GenericDetail(id, subid, col1, col2)
    select s.id,
           s.subid,
           n.x.value('col1[1]','varchar(10)') AS 'col1',
           n.x.value('col2[1]','varchar(10)') AS 'col2'
    from @GSH as s
      cross apply s.det.nodes('/Details/Detail') as n(x);