I have a strange issue with near query..
let $xml :=
<titles count="6">
<title type="source">ASIA-PACIFIC JOURNAL OF CLINICAL ONCOLOGY</title>
<title type="source_abbrev">ASIA-PAC J CLIN ONCO</title>
<title type="abbrev_iso">Asia-Pac. J. Clin. Oncol.</title>
<title type="abbrev_11">ASIA-PAC J</title>
<title type="abbrev_29">ASIA-PAC J CLIN ONCOL</title>
<title type="item">Phase II study of cetuximab with irinotecan for KRAS wild-type colorectal cancer in Japanese patients</title>
</titles>
Initially I ran this query
let $q1 :=
cts:element-query((xs:QName("title")),
cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
("case-insensitive", "wildcarded"))
)
return
cts:highlight($xml,$q1, <b>{$cts:text}</b>)
I got the result, which is correct
Now I ran this, and I got the following result which is correct
let $q2 :=
cts:element-query((xs:QName("title")),
cts:word-query(("trial*", "study", "studies*"),
("case-insensitive", "wildcarded"))
)
return
cts:highlight($xml,$q2, <b>{$cts:text}</b>)
Then I ran the following query with NEAR/0 and I did not get any
let $q3 :=
cts:near-query((
cts:element-query((xs:QName("title")),
cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
("case-insensitive", "wildcarded")))
,
cts:element-query((xs:QName("title")),
cts:word-query(("trial*", "study", "studies*"),
("case-insensitive", "wildcarded")))
),
0,
('ordered'))
return
cts:highlight($xml,$q3, <b>{$cts:text}</b>)
But then I ran the query with NEAR/1 and I got the result.. But why is that ? the pharse 1 is immediately followed by pharse 2 . so the NEAR distance should be 0 right ?
let $q3 :=
cts:near-query((
cts:element-query((xs:QName("title")),
cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
("case-insensitive", "wildcarded")))
,
cts:element-query((xs:QName("title")),
cts:word-query(("trial*", "study", "studies*"),
("case-insensitive", "wildcarded")))
),
1,
('ordered'))
return
cts:highlight($xml,$q3, <b>{$cts:text}</b>)
I believe MarkLogic indexes word distances starting with the anchor word at a location of 0, and the subsequent token at a distance of 1 etc. In order query neighboring words you need to use a near-query distance of 1. The queries in your examples are performing correctly.
To borrow from the MarkLogic cts:near-query documentation:
xquery version "1.0-ml";
let $x := <p>Now is the winter of our discontent</p>
return
cts:contains($x, cts:near-query(
("now", "the"),
2, "ordered"));
(: => returns true, "the" is 2 words from "now" :)
let $x := <p>Now is the winter of our discontent</p>
return
cts:contains($x, cts:near-query(
("now", "is"),
1, "ordered"));
(: => returns true, "is" is 1 word from "now" :)
let $x := <p>Now is the winter of our discontent</p>
return
cts:contains($x, cts:near-query(
("now", "is"),
0, "ordered"));
(: => returns false, "is" is 1 word from "now" :)