Search code examples
xmlxpathxqueryexist-db

XQuery - Extract distinct elments by attribute


I have an XML document which contains a list of customers and their orders and items in those orders. I want to get a list containing distinct values of customers. The customer's id is the attribute of "Name". Example XML:

<customerunion>
  <Customer Name="c2">
    <Order OrderNumber="o1">
      <Item Description="xyz">i10</Item>
    </Order>
  </Customer>

 <Customer Name="c3">
    <Order OrderNumber="o1">
      <Item Description="qwe">i11</Item>
      <Item Description="mnb">i12</Item>
    </Order>
    <Order OrderNumber="o2">
      <Item Description="cfg">i5</Item>
    </Order>
 </Customer>

 <Customer Name="c1">
    <Order OrderNumber="o1">
      <Item Description="abc">i1</Item>
      <Item Description="def">i2</Item>
    </Order>
 </Customer>

 <Customer Name="c4">
    <Order OrderNumber="o1">
      <Item Description="abc">i1</Item>
    </Order>
    <Order OrderNumber="o2">
      <Item Description="def">i2</Item>
    </Order>
 </Customer>

 <Customer Name="c2">
    <Order OrderNumber="o1">
      <Item Description="milk"/>
 </Order>
 </Customer>

 <Customer Name="c10">
    <Order OrderNumber="o1">
      <Item Description="chips greek yogort avacado"/>
    </Order>
 </Customer>

 <Customer Name="c4">
    <Order OrderNumber="o1">
      <Item Description="tea bags cheese"/>
    </Order>
 </Customer>

 <Customer Name="c11">
    <Order OrderNumber="o1">
      <Item Description=" milk sushi Notebook "/>
    </Order>
 </Customer>

 <Customer Name="c11">
    <Order OrderNumber="o2">
      <Item Description="grape tomato"/>
    </Order>
 </Customer>

 <Customer Name="c12">
    <Order OrderNumber="o1">
      <Item Description="bread"/>
    </Order>
 </Customer>
</customerunion>

and below is the result I want to get:

<customerlib>
  <Customer Name="c1">
    <Order OrderNumber="o1">
      <Item Description="abc">i1</Item>
      <Item Description="def">i2</Item>
    </Order>
  </Customer>

  <Customer Name="c2">
    <Order OrderNumber="o1">
      <Item Description="xyz">i10</Item>
    </Order>
  </Customer>

  <Customer Name="c3">
    <Order OrderNumber="o1">
      <Item Description="qwe">i11</Item>
      <Item Description="mnb">i12</Item>
    </Order>
    <Order OrderNumber="o2">
      <Item Description="cfg">i5</Item>
    </Order>
  </Customer>
  <Customer Name="c4">
    <Order OrderNumber="o1">
      <Item Description="abc">i1</Item>
    </Order>
    <Order OrderNumber="o2">
      <Item Description="def">i2</Item>
    </Order>
  </Customer>
  <Customer Name="c10">
    <Order OrderNumber="o1">
      <Item Description="chips">i1</Item>
      <Item Description="greek yogort">i2</Item>
      <Item Description="avacado">i3</Item>
    </Order>
  </Customer>

  <Customer Name="c11">
    <Order OrderNumber="o1">
      <Item Description="milk">i1</Item>
      <Item Description="sushi">i2</Item>
      <Item Description="Notebook">i3</Item>
    </Order>
    <Order OrderNumber="o2">
      <Item Description="grape tomato">i1</Item>
    </Order>
  </Customer>

  <Customer Name="c12">
    <Order OrderNumber="o1">
      <Item Description="bread">i1</Item>
    </Order>
  </Customer>
</customerlib>

I used the following XQuery:

let $customer := doc("customerunion.xml")/customerunion/Customer
let $allcustomername := doc("customerunion.xml")/Customer/@Name
for $uniquecustomer in distinct-values($customer/@Name)
let $order := distinct-values($customer[Name=$uniquecustomer]/Order)
let $item := distinct-values($customer[Name=$uniquecustomer]/Order/Item)
where $uniquecustomer = $allcustomername
return 
 <customerlib>
    {$uniquecustomer/..}
    {
      for $o in $order
      return $o
    }
    {
      for $i in $item
      return $i
    }
 </customerlib>

However, I got no results from running the query. Can you help me to modify to the XQuery to get the desired result?


Solution

  • If I understand you correctly it seems to me that you are asking to group all of the Orders into distinct Customers?

    If so, in eXist-db you can use the group by expression from XQuery 3.0. So the following query should work for you:

    <customerlib>
    {
        let $customers := doc("customerunion.xml")
        return
            for $customer in $customers/customersunion/Customer
            group by $customer-name := $customer/@Name
            order by substring-after($customer-name, "c") cast as xs:integer
            return
                <Customer Name="{$customer-name}">
                {
                    $customers/customersunion/Customer[@Name eq $customer-name]/Order
                }
                </Customer>
    }
    </customerlib>
    

    I have also added an order by expression, as the results that you showed were ordered. However this works based on the fact that your Customer Names are always of the format cN where N is some number. If this is not the case, you will need to adjust or remove that line from the query.