Search code examples
sql-serverxmlxpathsqlxml

How to recursively get XPath of a node using SQL Server?


I'm tired of looking at possibly the ugliest SQL statement I've ever built and need your help. I am searching through an XML document for various elements and want to see their XPaths. The query below works by brute force, but I cannot come up with a way to create a function or CTE that will properly support N levels.

declare @article xml = '<article>
  <front>
    <article-meta>
      <title-group>
        <article-title>Update on ...</article-title>
      </title-group>
    </article-meta>
  </front>
  <back>
    <ref-list>
      <ref id="R1">
        <citation citation-type="journal">
          <article-title>Retrospective study of ...</article-title>
        </citation>
      </ref>
    </ref-list>
  </back>
</article>'

SELECT
        Cast(T.r.query('local-name(parent::*/parent::*/parent::*/parent::*/parent::*/parent::*)') AS varchar(max)) + '/' +
        Cast(T.r.query('local-name(parent::*/parent::*/parent::*/parent::*/parent::*)') AS varchar(max)) + '/' +
        Cast(T.r.query('local-name(parent::*/parent::*/parent::*/parent::*)') AS varchar(max)) + '/' +
        Cast(T.r.query('local-name(parent::*/parent::*/parent::*)') AS varchar(max)) + '/' +
        Cast(T.r.query('local-name(parent::*/parent::*)') AS varchar(max)) + '/' +
        Cast(T.r.query('local-name(parent::*)') AS varchar(max)) AS ThePath,
        Cast(T.r.query('local-name(.)') AS varchar(max)) AS TheElement,
        T.r.query('.') AS TheXml
FROM @article.nodes('//article-title') T(r)

Result:

ThePath                                 TheElement
//article/front/article-meta/title-group    article-title
/article/back/ref-list/ref/citation         article-title

What I really want:

SELECT
  x.RowId,
  dbo.GetXPath(T.r.query('.')) AS ThePath, -- <---- Magic function goes here
    T.r.query('.') AS TheXml
FROM dbo.InputFormatXml x
  JOIN dbo.InputFormat f
    ON F.InputFormatId = x.InputFormatId
CROSS APPLY TheData.nodes('//article-title') T(r)
WHERE F.Description = 'NLM';

Solution

  • DECLARE @idoc int;
    
    EXEC sp_xml_preparedocument @idoc OUTPUT, @article; 
    
    SELECT ISNULL(id,'') id, parentid, localname
    INTO #nodetree
    FROM OPENXML(@idoc,'/',3)
    WHERE nodetype = 1;
    
    EXEC sp_xml_removedocument @idoc;
    
    ALTER TABLE #nodetree ADD PRIMARY KEY (id);
    
    WITH cte AS (
      SELECT
        parentid
       ,CAST('/' AS varchar(max)) + localname AS xpath
      FROM #nodetree WHERE localname = 'article-title'
      UNION ALL
      SELECT
        parent.parentid
       ,CAST('/' AS varchar(max)) + localname + xpath
      FROM cte AS node
      INNER JOIN #nodetree parent on parent.id = node.parentid
    )
    SELECT xpath
    FROM cte
    WHERE parentid IS NULL