Search code examples
marklogic

Marklogic Search options to restrict search for a JSON nested property


I have a json which has the following structure

{"scientist":{
         "username": "XXX",
         "name":"XXXX"
          ...
    },
    "registrar":{
      "username": "YYY",
         "name":"aaaaa"
          ...
    }
   }

I want to restrict the search to search a nested property in the above json for eg: -- want to search for "registrar/username".. I was trying to use the search -constraint "container".. but unable to do the search.. following is my code, and it does not seem to work..

Is there any other mechanism for the nested json field search ?

```

let $search := "(username:WHITEMK) AND (concept.registrar: )"

let $options := 
  <options xmlns="http://marklogic.com/appservices/search">
    <constraint name="concept.registrar">
      <container>
        <json-property>registrar</json-property>
      </container>
    </constraint>
    <constraint name="username">
      <value>
        <json-property>username</json-property>
        <term-option>case-insensitive</term-option>
        <term-option>wildcarded</term-option>
      </value>
    </constraint>
    <return-facets>false</return-facets>
    <return-values>false</return-values>
    <return-constraints>false</return-constraints>
    <return-frequencies>false</return-frequencies>
    <return-qtext>false</return-qtext>
    <search-option>unfiltered</search-option>
    <search-option>unfaceted</search-option>
    <search-option>format-json</search-option>
    <search-option>score-simple</search-option>
  </options>

let $start := 1
let $page-length :=10000

return search:search($search, $options, $start, $page-length)

```

Thanks, Ravi


Solution

  • I was able to get a more generic version without adding nested fields is by building a custom search options, which parses the nested JSON property and builds the search query using cts:json-property-scope-query..
    for eg: Let us say you want to search for some 3 level nested property

    let $search := "(concept:orfs.aminoAcids.predictedMatureSeqs.domains.heavyChainIsoType:igg1)"
    

    Using custom parser I was able to transform this to the following

    <cts:json-property-scope-query xmlns:cts="http://marklogic.com/cts">
    <cts:property>orfs</cts:property>
    <cts:json-property-scope-query>
    <cts:property>aminoAcids</cts:property>
    <cts:json-property-scope-query>
    <cts:property>predictedMatureSeqs</cts:property>
    <cts:json-property-scope-query>
    <cts:property>domains</cts:property>
    <cts:json-property-scope-query>
    <cts:property>heavyChainIsoType</cts:property>
    <cts:word-query>
    <cts:text xml:lang="en">igg1</cts:text>
    <cts:option>case-insensitive</cts:option>
    <cts:option>punctuation-insensitive</cts:option>
    <cts:option>whitespace-insensitive</cts:option>
    <cts:option>wildcarded</cts:option>
    </cts:word-query>
    </cts:json-property-scope-query>
    </cts:json-property-scope-query>
    </cts:json-property-scope-query>
    </cts:json-property-scope-query>
    </cts:json-property-scope-query>
    

    If anyone is interested following is the code for the custom parser

    xquery version "1.0-ml";
    module namespace gbrsso="http://marklogic.com/gbrs/modules";
    import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";
    (:
       This module tries to build a custom constraint.. and takes into account the nested query as well
       for eg: Concept:registrar.username:mamidrx
    :)
    
    (: This is parse function which is invoked my marklogic when it sees the custom constraing :)
    declare function gbrsso:parse($constraint-qtext as xs:string, $right as schema-element(cts:query)) as schema-element(cts:query){
     let $log := xdmp:log("Constraint-qtext : " || $constraint-qtext)
     let $log := xdmp:log("$right : " || fn:string($right//cts:text/text()))
     let $queryText := fn:string($right//cts:text/text())
     let $qparts := fn:tokenize($right//cts:text/text(), ":")
     let $queryText := $qparts[last()]
     let $log := xdmp:log("$queryText : " || $queryText)
    
      let $qpartsCnt := fn:count($qparts)
    
      let $query :=
          if(fn:count($qparts) = 1) then
            <root>{
              cts:word-query($queryText, ("case-insensitive", "wildcarded", "punctuation-insensitive", "whitespace-insensitive"))
              }</root>/*
          else 
            <root>{
              let $qparts := fn:tokenize($qparts[1],"\.") 
              let $endPart := $qparts[last()]
              (: remove the last part in sequence as we need to create word query with it :)
              let $qparts := fn:remove($qparts, fn:count($qparts))
              let $queryExp :=
                if(fn:count($qparts) = 0) then (: checks if it is nested... :)
                   cts:json-property-scope-query($endPart, cts:word-query($queryText, ("case-insensitive", "wildcarded", "punctuation-insensitive", "whitespace-insensitive")))
                else
                  let $xy := cts:json-property-scope-query($endPart, cts:word-query($queryText, ("case-insensitive", "wildcarded", "punctuation-insensitive", "whitespace-insensitive")))
                  return gbrsso:buildQuery($xy, $qparts)
    
              return $queryExp
            }</root>/*
    
      return $query
    
    };