Search code examples
marklogicmarklogic-9

Marklogic Additional Query and Searchable expression issue


I have both searchable-expression and additional-query in my options file.. In my additional-query I have cts-field-value-query and when I have this, I get empty results, even though I know that there are documents that match both searchable-expression and additional-query.. when I take out the cts:field-value-query the results come as expected.. I am including the options file and the xdmp:query-plan.. I am not understanding why the issue

let $options :=
<options xmlns="http://marklogic.com/appservices/search">
    <additional-query>
         <cts:and-query xmlns:cts="http://marklogic.com/cts">
            <cts:collection-query>
                <cts:uri>OncoWatch</cts:uri>
            </cts:collection-query>
             <cts:field-value-query>
                <cts:field>wos_doctype</cts:field>
                <cts:text>Article</cts:text>
                <cts:text>Review</cts:text>
            </cts:field-value-query>
        </cts:and-query>
    </additional-query>
    <searchable-expression xmlns:es="http://marklogic.com/entity-services"
                           xmlns:wos_dps="http://clarivate.com/schema/wok5.27/public/FullRecord">
      /es:envelope/es:raw/wos_dps:REC/wos_dps:static_data/wos_dps:summary/wos_dps:titles/wos_dps:title
    </searchable-expression>
    <term>
        <term-option>case-insensitive</term-option>
        <term-option>punctuation-insensitive</term-option>
        <term-option>whitespace-insensitive</term-option>
        <term-option>wildcarded</term-option>
    </term>
    <return-plan>true</return-plan>
</options>

let $q := '"pancreatic cancer"'


return
search:search($q, $options)

The xdmp plan looks like this

    <search:response snippet-format="snippet" total="0" start="1" page-length="10" xmlns:search="http://marklogic.com/appservices/search">
  <search:plan>
    <qry:query-plan xmlns:qry="http://marklogic.com/cts/query">
      <qry:expr-trace>impl:apply-search(map:map(&lt;map:map xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" .../&gt;), "xdmp:plan", fn:false())</qry:expr-trace>
      <qry:info-trace>Analyzing path for search: fn:collection()/es:envelope/es:raw/wos_dps:REC/wos_dps:static_data/wos_dps:summary/wos_dps:titles/wos_dps:title</qry:info-trace>
      <qry:info-trace>Step 1 is searchable: fn:collection()</qry:info-trace>
      <qry:info-trace>Step 2 is searchable: es:envelope</qry:info-trace>
      <qry:info-trace>Step 3 is searchable: es:raw</qry:info-trace>
      <qry:info-trace>Step 4 is searchable: wos_dps:REC</qry:info-trace>
      <qry:info-trace>Step 5 is searchable: wos_dps:static_data</qry:info-trace>
      <qry:info-trace>Step 6 is searchable: wos_dps:summary</qry:info-trace>
      <qry:info-trace>Step 7 is searchable: wos_dps:titles</qry:info-trace>
      <qry:info-trace>Step 8 is searchable: wos_dps:title</qry:info-trace>
      <qry:info-trace>Path is fully searchable.</qry:info-trace>
      <qry:info-trace>Gathering constraints.</qry:info-trace>
      <qry:info-trace>Search query contributed 3 constraints: cts:and-query((cts:word-query("pancreatic cancer", ("case-insensitive","punctuation-insensitive","whitespace-insensitive","wildcarded","lang=en"), 1), cts:collection-query("OncoWatch"), cts:field-value-query("wos_doctype", ("Article", "Review"), ("lang=en"), 1)), ())</qry:info-trace>
      <qry:partial-plan>
    <qry:term-query weight="1">
      <qry:key>17396426271722676659</qry:key>
    </qry:term-query>
      </qry:partial-plan>
      <qry:partial-plan>
    <qry:term-query weight="0">
      <qry:key>16172058720999549713</qry:key>
      <qry:annotation>collection(OncoWatch)</qry:annotation>
    </qry:term-query>
      </qry:partial-plan>
      <qry:partial-plan>
    <qry:or-two-queries>
      <qry:term-query weight="1">
        <qry:key>8492163112599871394</qry:key>
      </qry:term-query>
      <qry:term-query weight="1">
        <qry:key>10427204889894421302</qry:key>
      </qry:term-query>
    </qry:or-two-queries>
      </qry:partial-plan>
      <qry:info-trace>Executing search.</qry:info-trace>
      <qry:ordering/>
      <qry:final-plan>
    <qry:and-query>
      <qry:term-query weight="0">
        <qry:key>3030157765137304984</qry:key>
        <qry:annotation>descendant(doc-root(element(es:envelope),doc-kind(document)))</qry:annotation>
      </qry:term-query>
      <qry:term-query weight="0">
        <qry:key>14623301529635238289</qry:key>
        <qry:annotation>descendant(element-child(es:envelope/es:raw))</qry:annotation>
      </qry:term-query>
      <qry:term-query weight="0">
        <qry:key>2513953156802156363</qry:key>
        <qry:annotation>descendant(element-child(es:raw/wos_dps:REC))</qry:annotation>
      </qry:term-query>
      <qry:term-query weight="0">
        <qry:key>15620627614067983352</qry:key>
        <qry:annotation>descendant(element-child(wos_dps:REC/wos_dps:static_data))</qry:annotation>
      </qry:term-query>
      <qry:term-query weight="0">
        <qry:key>14267880419025656818</qry:key>
        <qry:annotation>descendant(element-child(wos_dps:static_data/wos_dps:summary))</qry:annotation>
      </qry:term-query>
      <qry:term-query weight="0">
        <qry:key>1256980818223445232</qry:key>
        <qry:annotation>descendant(element-child(wos_dps:summary/wos_dps:titles))</qry:annotation>
      </qry:term-query>
      <qry:or-two-queries>
        <qry:term-query weight="0">
          <qry:key>7002325115516888131</qry:key>
          <qry:annotation>element-child(wos_dps:titles/wos_dps:title)</qry:annotation>
        </qry:term-query>
        <qry:term-query weight="0">
          <qry:key>14914226620346018347</qry:key>
          <qry:annotation>link-child(descendant(element-child(wos_dps:titles/wos_dps:title)))</qry:annotation>
        </qry:term-query>
      </qry:or-two-queries>
      <qry:term-query weight="1">
        <qry:key>17396426271722676659</qry:key>
      </qry:term-query>
      <qry:term-query weight="0">
        <qry:key>16172058720999549713</qry:key>
        <qry:annotation>collection(OncoWatch)</qry:annotation>
      </qry:term-query>
      <qry:or-two-queries>
        <qry:term-query weight="1">
          <qry:key>8492163112599871394</qry:key>
        </qry:term-query>
        <qry:term-query weight="1">
          <qry:key>10427204889894421302</qry:key>
        </qry:term-query>
      </qry:or-two-queries>
    </qry:and-query>
      </qry:final-plan>
      <qry:info-trace>Selected 1264 fragments to filter</qry:info-trace>
      <qry:result estimate="1264"/>
    </qry:query-plan>
  </search:plan>
  <search:qtext>"pancreatic cancer"</search:qtext>
  <search:metrics>
    <search:query-resolution-time>PT0.183458S</search:query-resolution-time>
    <search:total-time>PT0.186923S</search:total-time>
  </search:metrics>
</search:response>

And here is the plan when I take out the cts:field-value-query

<search:plan xmlns:search="http://marklogic.com/appservices/search">
  <qry:query-plan xmlns:qry="http://marklogic.com/cts/query">
    <qry:expr-trace>impl:apply-search(map:map(&lt;map:map xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" .../&gt;), "xdmp:plan", fn:false())</qry:expr-trace>
    <qry:info-trace>Analyzing path for search: fn:collection()/es:envelope/es:raw/wos_dps:REC/wos_dps:static_data/wos_dps:summary/wos_dps:titles/wos_dps:title</qry:info-trace>
    <qry:info-trace>Step 1 is searchable: fn:collection()</qry:info-trace>
    <qry:info-trace>Step 2 is searchable: es:envelope</qry:info-trace>
    <qry:info-trace>Step 3 is searchable: es:raw</qry:info-trace>
    <qry:info-trace>Step 4 is searchable: wos_dps:REC</qry:info-trace>
    <qry:info-trace>Step 5 is searchable: wos_dps:static_data</qry:info-trace>
    <qry:info-trace>Step 6 is searchable: wos_dps:summary</qry:info-trace>
    <qry:info-trace>Step 7 is searchable: wos_dps:titles</qry:info-trace>
    <qry:info-trace>Step 8 is searchable: wos_dps:title</qry:info-trace>
    <qry:info-trace>Path is fully searchable.</qry:info-trace>
    <qry:info-trace>Gathering constraints.</qry:info-trace>
    <qry:info-trace>Search query contributed 2 constraints: cts:and-query((cts:word-query("pancreatic cancer", ("case-insensitive","punctuation-insensitive","whitespace-insensitive","wildcarded","lang=en"), 1), cts:collection-query("OncoWatch")), ())</qry:info-trace>
    <qry:partial-plan>
      <qry:term-query weight="1">
    <qry:key>17396426271722676659</qry:key>
      </qry:term-query>
    </qry:partial-plan>
    <qry:partial-plan>
      <qry:term-query weight="0">
    <qry:key>16172058720999549713</qry:key>
    <qry:annotation>collection(OncoWatch)</qry:annotation>
      </qry:term-query>
    </qry:partial-plan>
    <qry:info-trace>Executing search.</qry:info-trace>
    <qry:ordering/>
    <qry:final-plan>
      <qry:and-query>
    <qry:term-query weight="0">
      <qry:key>3030157765137304984</qry:key>
      <qry:annotation>descendant(doc-root(element(es:envelope),doc-kind(document)))</qry:annotation>
    </qry:term-query>
    <qry:term-query weight="0">
      <qry:key>14623301529635238289</qry:key>
      <qry:annotation>descendant(element-child(es:envelope/es:raw))</qry:annotation>
    </qry:term-query>
    <qry:term-query weight="0">
      <qry:key>2513953156802156363</qry:key>
      <qry:annotation>descendant(element-child(es:raw/wos_dps:REC))</qry:annotation>
    </qry:term-query>
    <qry:term-query weight="0">
      <qry:key>15620627614067983352</qry:key>
      <qry:annotation>descendant(element-child(wos_dps:REC/wos_dps:static_data))</qry:annotation>
    </qry:term-query>
    <qry:term-query weight="0">
      <qry:key>14267880419025656818</qry:key>
      <qry:annotation>descendant(element-child(wos_dps:static_data/wos_dps:summary))</qry:annotation>
    </qry:term-query>
    <qry:term-query weight="0">
      <qry:key>1256980818223445232</qry:key>
      <qry:annotation>descendant(element-child(wos_dps:summary/wos_dps:titles))</qry:annotation>
    </qry:term-query>
    <qry:or-two-queries>
      <qry:term-query weight="0">
        <qry:key>7002325115516888131</qry:key>
        <qry:annotation>element-child(wos_dps:titles/wos_dps:title)</qry:annotation>
      </qry:term-query>
      <qry:term-query weight="0">
        <qry:key>14914226620346018347</qry:key>
        <qry:annotation>link-child(descendant(element-child(wos_dps:titles/wos_dps:title)))</qry:annotation>
      </qry:term-query>
    </qry:or-two-queries>
    <qry:term-query weight="1">
      <qry:key>17396426271722676659</qry:key>
    </qry:term-query>
    <qry:term-query weight="0">
      <qry:key>16172058720999549713</qry:key>
      <qry:annotation>collection(OncoWatch)</qry:annotation>
    </qry:term-query>
      </qry:and-query>
    </qry:final-plan>
    <qry:info-trace>Selected 1337 fragments to filter</qry:info-trace>
    <qry:result estimate="1337"/>
  </qry:query-plan>
</search:plan>

Solution

  • The query plan shows that in both cases fragments were selected from the indexes, so I think your field-value-query did match something. They are excluded by filtering.

    That is probably because of your searchable expression. It points to a very specific element deep inside your envelope, and the values for your field likely come from elements outside that path.

    In case you are trying to influence search snippets, I'd recommend using the transform-results and preferred-matches features instead of the searchable expression. Only using a high level element for searchable expression, like /es:envelope/es:raw, or /es:envelope/es:instance, or simply none at all.

    HTH!