Search code examples
djangodjango-rest-frameworkdjango-querysetdjango-filterdjango-viewsets

Django ORM distinct on only a subset of the queryset


Working in Django Rest Framework (DRF), django-filter, and PostgreSQL, and having an issue with one of our endpoints.

Assume the following:

# models.py
class Company(models.Model):
    name = models.CharField(max_length=50)


class Venue(models.Model):
    company = models.ForeignKey(to="Company", on_delete=models.CASCADE)
    name = models.CharField(max_length=50)

# create some data

company1 = Company.objects.create(name="Proper Ltd")
company2 = Company.objects.create(name="MyCompany Ltd")

Venue.objects.create(name="Venue #1", company=company1)
Venue.objects.create(name="Venue #2", company=company1)
Venue.objects.create(name="Property #1", company=company2)
Venue.objects.create(name="Property #2", company=company2)

# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
    serializer_class = CompanyVenueSearchSerializer
    queryset = (
        Venue.objects.all()
        .select_related("company")
        .order_by("company__name")
    )
    permission_classes = (ReadOnly,)
    http_method_names = ["get"]
    filter_backends = (filters.DjangoFilterBackend,)
    filterset_class = CompanyVenueListFilter
    pagination_class = None

# filterset
class CompanyVenueListFilter(filters.FilterSet):
    text = filters.CharFilter(method="name_search")

    def name_search(self, qs, name, value):
        return qs.filter(
            Q(name__icontains=value)
            | Q(company__name__icontains=value)
        )

    class Meta:
        model = Venue
        fields = [
            "name",
            "company__name",
        ]

# serializer
class CompanyVenueSearchSerializer(serializers.ModelSerializer):
    company_id = serializers.IntegerField(source="company.pk")
    company_name = serializers.CharField(source="company.name")
    venue_id = serializers.IntegerField(source="pk")
    venue_name = serializers.CharField(source="name")

    class Meta:
        model = Venue
        fields = (
            "company_id",
            "company_name",
            "venue_id",
            "venue_name",
        )

We now want to allow the user to filter the results by sending a query in the request, e.g. curl -X GET https://example.com/api/company/?text=pr.

The serializer result will look something like:

[
   {
      "company_id":1,
      "company_name":"Proper Ltd",
      "venue_id":1,
      "venue_name":"Venue #1"
   },
   {  // update ORM to exclude this dict
      "company_id":1,
      "company_name":"Proper Ltd",
      "venue_id":2,
      "venue_name":"Venue #1"
   },
   {
      "company_id":2,
      "company_name":"MyCompany Ltd",
      "venue_id":3,
      "venue_name":"Property #1"
   },
   {
      "company_id":2,
      "company_name":"MyCompany Ltd",
      "venue_id":4,
      "venue_name":"Property #1"
   }
]

Expected result:

Want to rewrite the ORM query so that if the filter ("pr") matches the venue__name, return all venues. But if the filter matches the company__name, only return it once, thus in the example above the second dict in the list would be excluded/removed.

Is this possible?


Solution

  • What you can do is to filter Company that matches name filtering and annotate them with the first related Venue and then combine it's results with the second requirement to return venue with name=value

    from django.db.models import OuterRef, Q, Subquery
    
    value = "pr"
    first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
    company_qs = Company.objects.filter(name__icontains=value).annotate(
        first_venue_id=Subquery(first_venue.values("id")[:1])
    )
    venue_qs = Venue.objects.filter(
        Q(name__icontains=value)
        | Q(id__in=company_qs.values_list("first_venue_id", flat=True))
    )
    

    The query executed when accessing values of venue_qs looks like

    SELECT
        "venues_venue"."id",
        "venues_venue"."company_id",
        "venues_venue"."name"
    FROM
        "venues_venue"
    WHERE
        (
            UPPER("venues_venue"."name"::TEXT) LIKE UPPER(% pr %)
            OR "venues_venue"."id" IN (
                SELECT
                    (
                        SELECT
                            U0."id"
                        FROM
                            "venues_venue" U0
                        WHERE
                            U0."company_id" IN (V0."id")
                        ORDER BY
                            U0."id" ASC
                        LIMIT
                            1
                    ) AS "first_venue_id"
                FROM
                    "venues_company" V0
                WHERE
                    UPPER(V0."name"::TEXT) LIKE UPPER(% pr %)
            )
        )
    

    This is how the filter should look like

    class CompanyVenueListFilter(filters.FilterSet):
        text = filters.CharFilter(method="name_search")
    
        def name_search(self, qs, name, value):
            first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
            company_qs = Company.objects.filter(name__icontains=value).annotate(
                first_venue_id=Subquery(first_venue.values("id")[:1])
            )
            return qs.filter(
                Q(name__icontains=value)
                | Q(id__in=company_qs.values_list("first_venue_id", flat=True))
            )
    
        class Meta:
            model = Venue
            fields = [
                "name",
                "company__name",
            ]
    

    Update for Django 3.2.16

    Seems like the query above will not work for such version because it generated a query without parentheses in WHERE clause around V0."id", chunk of query looks like

    WHERE
        U0."company_id" IN V0."id"
    

    and it makes PostgreSQL complain with error

    ERROR: syntax error at or near "V0"
    LINE 17: U0."company_id" IN V0."id" 
    

    For Django==3.2.16 the filtering method in CompanyVenueListFilter could look like following:

        def name_search(self, qs, name, value):
            company_qs = Company.objects.filter(name__icontains=value)
            venues_qs = (
                Venue.objects.filter(company__in=company_qs)
                .order_by("company_id", "id")
                .distinct("company_id")
            )
            return qs.filter(Q(name__icontains=value) | Q(id__in=venues_qs.values_list("id")))
    

    The answer is based on other stackoverflow answer and django docs