Search code examples
marklogic

Marklogic - How to hide elements node


I try to exclude unwanted node in documents. I could not figure out how to make it works. for example multiple documents:

<doc:Record Type="testdata" xmlns:doc="http://db/test/record">
  <meta:Metadata xmlns:meta="http://db/test/record/meta">

    <meta:docid>09266</meta:docid>
    <meta:Collections>
       <meta:Collection>Universities</meta:Collection>
    </meta:Collections>
    <meta:NonFundingSources>
      <meta:TotalNonDODFunding>0 </meta:TotalNonDODFunding>
    </meta:NonFundingSources>
  </meta:Metadata>
</doc:Record>

The result should hide the "NonFundingSources" element and return all other element nodes.

 <doc:Record Type="testdata" xmlns:doc="http://db/test/record">
   <meta:Metadata xmlns:meta="http://db/test/record/meta">

     <meta:docid>09266</meta:docid>
     <meta:Collections>
       <meta:Collection>Universities</meta:Collection>
     </meta:Collections>
  </meta:Metadata>
</doc:Record>

In my code I retrieve all elements, but I could not figure out how to hide (remove) the unwanted element node before saving to the file system.

let $uris := cts:uris((),
                  (),
                  cts:and-query((
                          cts:collection-query("/ure/univs"),
                          cts:field-word-query("dc",("a","be","x")),
                          cts:field-range-query("crd",">=","2011-01-01"),
                          cts:field-range-query("crd","<","2012-01-01")
                  ))

                 )

   for $uri in $uris

   let $docNumber := fn:data($uri//meta:docid)

   return xdmp:save(fn:concat("/report/",$docNumber,".xml") ,$uri ) 

Solution

  • One easy way to prune the element would be to apply an XSLT that uses the identity template to copy most content and an empty template matching on the element(s) that you want to remove:

    declare variable $XSLT := 
      <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
      xmlns:meta="http://db/test/record/meta">
        <xsl:output indent="yes"/>
        <!--by default, every attribute and node is copied to the output-->
        <xsl:template match="@*|node()">
          <xsl:copy>
            <xsl:apply-templates select="@*|node()"/>
           </xsl:copy>
        </xsl:template>
        <!--Whenever this element is matched, no output is generated for it, 
            or any of it's descendants -->
        <xsl:template match="meta:NonFundingSources"/>
    </xsl:stylesheet>;
    
    for $doc in cts:search(doc(),  
                            cts:and-query((
                              cts:collection-query("/ure/univs"),
                              cts:field-word-query("dc",("a","be","x")),
                              cts:field-range-query("crd",">=","2011-01-01"),
                              cts:field-range-query("crd","<","2012-01-01")
                            ))
                  )
    
    let $docNumber := $doc//meta:docid/string()
    (: transform the document, removing content that we don't want :)
    let $redacted := xdmp:xslt-eval($XSLT, $doc)
    return 
      xdmp:save(fn:concat("/report/",$docNumber,".xml"), $redacted )
    

    MarkLogic also has element level security and redaction features, which could be used to hide or redact content.