Search code examples
sql-serverxmlt-sqlsql-server-2012xquery

How to get nodes path in XML by value?


I have XML with multiple node structure and would like to get nodes path in XML by searching value "12345" using SQL Server 2012.

Sample XML :

<?xml version="1.0" encoding="UTF-8"?>
<Parent>
  <DataExtensions>
   <DataItem key="Key1" type="String">
     <value>12345</value>
   </DataItem>
    <DataItem key="Key2" type="String">
     <value>6789</value>
   </DataItem>
 </DataExtensions>
 <TestKey1>12345</TestKey1>
 <Answers>
    <AnswersList>
        <Entry key="Key2" type="Answer">
          <value ID="Key1ID" Text="12345" />
       </Entry>
       <Entry key="Key3" type="Answer">
          <value ID="Key3ID" Text="6789" />
       </Entry>
     </AnswersList>
  </Answers>
  <TestKey2>12345</TestKey2>
  <TestKey3>67890</TestKey3>
  <TestKey4>12345</TestKey3>
</Parent> 

I am able to get values by writing for each select statement, but I would like to get node path where key value or value is equal to 12345 in the whole XML.

Desired output in a table would be:

(/*/TestKey1/text())
(/*/TestKey2/text())
(/*/TestKey4/text())
(/*/DataExtensions/DataItem[@key=("Key1")]/value())
 (/*/Answers/AnswersList/Entry[@key=("Key2")]/value/text())

Please help.


Solution

  • You can try the following solution. It does many powerful things, including XML visual tree representation. Your answer is in the [XPath] column of the output.

    T-SQL

    DECLARE @xml XML = 
    N'<Parent>
        <DataExtensions>
            <DataItem key="Key1" type="String">
                <value>12345</value>
            </DataItem>
            <DataItem key="Key2" type="String">
                <value>6789</value>
            </DataItem>
        </DataExtensions>
        <TestKey1>12345</TestKey1>
        <Answers>
            <AnswersList>
                <Entry key="Key2" type="Answer">
                    <value ID="Key1ID" Text="6789"/>
                </Entry>
                <Entry key="Key3" type="Answer">
                    <value ID="Key3ID" Text="12345"/>
                </Entry>
            </AnswersList>
        </Answers>
        <TestKey2>12345</TestKey2>
        <TestKey3>67890</TestKey3>
        <TestKey4>12345</TestKey4>
    </Parent>';
    
    -- Solution # 2.
    -- a thing of beauty.
    WITH cte AS (  
    SELECT 1 AS lvl,  
            x.value('local-name(.)','NVARCHAR(MAX)') AS Name,  
            CAST(NULL AS NVARCHAR(MAX)) AS ParentName, 
            CAST(1 AS INT) AS ParentPosition, 
            CAST(N'Element' AS NVARCHAR(20)) AS NodeType,  
            '/' + x.value('local-name(.)','NVARCHAR(MAX)') AS FullPath,  
            '/' + x.value('local-name(.)','NVARCHAR(MAX)')  
            + N'[' 
            + CAST(ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS NVARCHAR)  
            + N']' AS XPath,  
            ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS Position, 
            x.value('local-name(.)','NVARCHAR(MAX)') AS Tree,  
            x.value('text()[1]','NVARCHAR(MAX)') AS Value,  
            x.query('.') AS this,         
            x.query('*') AS t,  
            CAST(CAST(1 AS VARBINARY(4)) AS VARBINARY(MAX)) AS Sort,  
            CAST(1 AS INT) AS ID  
    FROM @xml.nodes('/*') a(x)  
    UNION ALL 
    SELECT p.lvl + 1 AS lvl,  
            c.value('local-name(.)','NVARCHAR(MAX)') AS Name,  
            CAST(p.Name AS NVARCHAR(MAX)) AS ParentName, 
        CAST(p.Position AS INT) AS ParentPosition, 
            CAST(N'Element' AS NVARCHAR(20)) AS NodeType,  
            CAST(p.FullPath + N'/' + c.value('local-name(.)','NVARCHAR(MAX)') AS NVARCHAR(MAX)) AS FullPath,
    
            CAST(p.XPath + N'/'+ c.value('local-name(.)','NVARCHAR(MAX)')
            + N'['
            + CAST(ROW_NUMBER() OVER(PARTITION BY c.value('local-name(.)','NVARCHAR(MAX)') ORDER BY (SELECT 1)) AS NVARCHAR)
            + N']' AS NVARCHAR(MAX)) AS XPath,  
    
            ROW_NUMBER() OVER(PARTITION BY c.value('local-name(.)','NVARCHAR(MAX)')
            ORDER BY (SELECT 1)) AS Position, 
            CAST( SPACE(2 * p.lvl - 1) + N'|' + REPLICATE(N'-', 1) + c.value('local-name(.)','NVARCHAR(MAX)') AS NVARCHAR(MAX)) AS Tree,  
            CAST( c.value('text()[1]','NVARCHAR(MAX)') AS NVARCHAR(MAX) ) AS Value, c.query('.') AS this,  
            c.query('*') AS t,  
            CAST(p.Sort + CAST( (lvl + 1) * 1024 + (ROW_NUMBER() OVER(ORDER BY (SELECT 1)) * 2) AS VARBINARY(4)) AS VARBINARY(MAX) ) AS Sort,  
            CAST((lvl + 1) * 1024 + (ROW_NUMBER() OVER(ORDER BY (SELECT 1)) * 2) AS INT)  
    FROM cte p  
    CROSS APPLY p.t.nodes('*') b(c))
    , cte2 AS (  
    SELECT lvl AS Depth,  
            Name AS NodeName,  
            ParentName, 
            ParentPosition, 
            NodeType,  
            FullPath,  
            XPath,  
            Position, 
            Tree AS TreeView,  
            Value,  
            this AS XMLData,  
            Sort, ID  
            FROM cte  
    UNION ALL 
    SELECT p.lvl,  
            x.value('local-name(.)','NVARCHAR(MAX)'),  
            p.Name, 
            p.Position, 
            CAST(N'Attribute' AS NVARCHAR(20)),  
            p.FullPath + N'/@' + x.value('local-name(.)','NVARCHAR(MAX)'),  
            p.XPath + N'/@' + x.value('local-name(.)','NVARCHAR(MAX)'),  
            1, 
            SPACE(2 * p.lvl - 1) + N'|' + REPLICATE('-', 1)  
            + N'@' + x.value('local-name(.)','NVARCHAR(MAX)'),  
            x.value('.','NVARCHAR(MAX)'),  
            NULL,  
            p.Sort,  
            p.ID + 1  
    FROM cte p  
    CROSS APPLY this.nodes('/*/@*') a(x)  
    )  
    SELECT ROW_NUMBER() OVER(ORDER BY Sort, ID) AS ID,  
        ParentName, ParentPosition,Depth, NodeName, Position,   
        NodeType, FullPath, XPath, TreeView, Value, XMLData
    FROM cte2
    WHERE Value = '12345';
    

    T-SQL, short version

    DECLARE @DocHandle INT;
    EXEC sp_xml_preparedocument @DocHandle OUTPUT, @xml;
    
    --SELECT * FROM OPENXML(@DocHandle,'/*');
    
    ;WITH rs AS
    (
       SELECT * FROM OPENXML(@DocHandle,'/*')
    ), cte AS
    (
       -- anchor
        SELECT id
          ,ParentID
          --, nodetype
          , [text]
          ,CAST(id AS VARCHAR(100)) AS [Path]
          ,CAST('/' + rs.localname AS VARCHAR(1000)) 
          + N'[' 
            + CAST(ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS NVARCHAR)  
            + N']' AS [XPath]
        FROM rs
        WHERE ParentID IS NULL
        UNION ALL
        --recursive member
        SELECT t.id
          ,t.ParentID
          --, nodetype = (SELECT nodetype FROM rs WHERE id = t.ParentID)
          , t.[text]
          , CAST(a.[Path] + ',' + CAST( t.ID AS VARCHAR(100)) AS VARCHAR(100)) AS [Path]
          , CAST(a.[XPath] + '/' + IIF(t.nodetype = 2, '@', '')
             + t.localname AS VARCHAR(1000)) 
             + N'['
            + CAST(ROW_NUMBER() OVER(PARTITION BY t.localname ORDER BY (SELECT 1)) AS NVARCHAR)
            + N']' AS [XPath]
        FROM rs AS t
          INNER JOIN cte AS a ON t.ParentId = a.id
    )
    SELECT ID, ParentID, [Path]
       , REPLACE([XPath],'[1]/#text[1]','') AS XPath, [text] AS [Value]
    FROM cte
    WHERE [text] IS NOT NULL
        AND CAST([text] AS VARCHAR(30)) = '12345'
    ORDER BY [Path];
    
    EXEC sp_xml_removedocument @DocHandle;
    

    Output

    +----+----------+---------------------+--------------------------------------------------------------+-------+
    | ID | ParentID |        Path         |                            XPath                             | Value |
    +----+----------+---------------------+--------------------------------------------------------------+-------+
    | 35 |       11 |             0,11,35 | /Parent[1]/TestKey1                                          | 12345 |
    | 43 |       25 | 0,12,13,20,23,25,43 | /Parent[1]/Answers[1]/AnswersList[1]/Entry[2]/value[1]/@Text | 12345 |
    | 31 |        6 |          0,2,3,6,31 | /Parent[1]/DataExtensions[1]/DataItem[1]/value               | 12345 |
    | 44 |       26 |             0,26,44 | /Parent[1]/TestKey2                                          | 12345 |
    | 46 |       28 |             0,28,46 | /Parent[1]/TestKey4                                          | 12345 |
    +----+----------+---------------------+--------------------------------------------------------------+-------+