Search code examples
searchxquerymarklogicmarklogic-8search-snippet

MarkLogic diacritic-insensitive snippet


For now I'm using this code to generate snippet, based on a JSON document that I'm getting from MarkLogic search.

xquery version "1.0-ml";
module namespace searchlib="http://ihs.com/lib/searchlib";
import module namespace search="http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy"; 
import module namespace json="http://marklogic.com/xdmp/json" at "/MarkLogic/json/json.xqy";

declare function searchlib:get-snippet($docc,$words) {
  let $doc:= json:transform-from-json($docc)
  let $squery := search:parse($words)
  let $result := <result>{search:snippet($doc,$squery,
  <transform-results apply="snippet" xmlns="http://marklogic.com/appservices/search">
          <max-snippet-chars>255</max-snippet-chars>

      </transform-results>)}</result>

  return $result//search:match
};

When performing search I'm using:

cts.jsonPropertyValueQuery(fieldname, values, 
                                             ['case-insensitive', 'diacritic-insensitive'])

So search works diacritic-insensitive and produces good results, but in search:snippet I'm not able to pass diacritic-insensitive option as in cts.jsonPropertyValueQuery.

In documentation I can see this in description

Options to define the search grammar and control the search. See description for $options for the function search:search. Note that you cannot specify the apply attribute on the transform-results option with search:snippet; to use a different snippetting function, use search:search or search:resolve instead.

But in here it is:

search:snippet(
   $result as node(),
   $cts-query as schema-element(cts:query),
   [$options as element(search:transform-results)?]
) as element(search:snippet)

So does it mean I can't pass other options to search:snippet? Or is there a option to do this?

I'm testing it using chávez and it is producing results, but snippets are generated properly only for documents containing exact match that means that document

Chavez did something

Will not get highligh on Chavez and

Chávez did something

Will get a highligh

Thanks in advance!


Solution

  • Problem was in not passing options to search:snippet, but to search:parse

    xquery version "1.0-ml";
    module namespace searchlib="http://ihs.com/lib/searchlib";
    import module namespace search="http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy"; 
    import module namespace json="http://marklogic.com/xdmp/json" at "/MarkLogic/json/json.xqy";
    
    declare function searchlib:get-snippet($docc,$words) {
      let $doc:= json:transform-from-json($docc)
      let $squery := search:parse($words,
    <options xmlns="http://marklogic.com/appservices/search">
    <term>
    <term-option>case-insensitive</term-option>
    <term-option>diacritic-insensitive</term-option>
    </term>
    </options>, "cts:query")
    
      let $result := <result>{search:snippet($doc,$squery,
      <transform-results apply="snippet" xmlns="http://marklogic.com/appservices/search">
              <max-snippet-chars>255</max-snippet-chars>
    
          </transform-results>)}</result>
    
      return $result//search:match
    };
    

    passing

    <term-option>diacritic-insensitive</term-option>
    

    to search:parse made it work.

    Here is explanation from MarkLogic:

    The search:snippet() function allows you to extract matching text and returns the matches wrapped in a containing node, with highlights tagged. However, to allow the search:snippet to extract the correct text, the cts:query() that is passed to the snippet should match the sequence of values. For search:snippet, cts:query is typically a result of a call to search:parse. The search:parse() function parses query text according to given options and returns the appropriate cts:query XML.