Search code examples
pythondjangopostgresqltagsfull-text-search

django full text search taggit


My application - the basics

I have a simple django application which allows for storing information about certain items and I'm trying to implement a search view/functionality.

I'm using django-taggit to tag the items by their functionality/features.

What I want to implement

I want to implement a full text search which allows to search across all the fields of the items, including their tags.

The problem(s)

  1. On the results view, the tagged items are showing up multiple times (one occurence per tag)
  2. The ranking is correct when I specify * only a single* tag in the search field, but when I specify multiple tag names, I will get unexpected ranking results.

I suspect the SearchVector() does not resolve the tags relation as I expected it to do. The tags should be treated just like a list of words in this case.

Example Code

models.py

from django.db import models
from taggit.managers import TaggableManager

class Item(models.Model):
    identifier = models.SlugField('ID', unique=True, editable=False)
    short_text = models.CharField('Short Text', max_length=100, blank=True)
    serial_number = models.CharField('Serial Number', max_length=30, blank=True)
    revision = models.CharField('Revision/Version', max_length=30, blank=True)
    part_number = models.CharField('Part Number', max_length=30, blank=True)
    manufacturer = models.CharField('Manufacturer', max_length=30, blank=True)
    description = models.TextField('Description', blank=True)
    tags = TaggableManager('Tags', blank=True)
    is_active = models.BooleanField('Active', default=True)

forms.py

from django import forms

class SearchForm(forms.Form):
    search = forms.CharField(max_length=200, required=False)
    active_only = forms.BooleanField(initial=True, label='Show active items only', required=False)

views.py

from django.views.generic.list import ListView
from django.contrib.postgres.search import SearchQuery, SearchVector, SearchRank

from . import models
from . import forms

class ItemListView(ListView):
    form_class = forms.SearchForm
    model = models.Item
    fields = ['serial_number', 'part_number', 'manufacturer', 'tags', 'is_active']
    template_name_suffix = '_list'

    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context['form'] = self.form_class(self.request.GET)
        return context

    def get_queryset(self):
        queryset = super().get_queryset()
        form = self.form_class(self.request.GET)
        if form.is_valid():
            if form.cleaned_data['active_only']:
                queryset = queryset.filter(is_active=True)

            if not form.cleaned_data['search']:
                return super().get_queryset()

            search_vector = SearchVector('identifier', 'short_text', 'serial_number', 'revision', 'part_number',
                                         'manufacturer', 'description', 'tags')
            search_query = SearchQuery(form.cleaned_data['search'], search_type='websearch')
            return (
                queryset.annotate(
                    search=search_vector, rank=SearchRank(search_vector, search_query)
                )
                # .filter(search=search_query)
                .order_by("-rank").distinct()
            ) #.filter(search__icontains=form.cleaned_data['search'],)
        return super().get_queryset()

Solution

  • Your problem is that you're adding the tags field directly to your SearchVector

    Lets concatenate the tags with Django's StringAgg into a single string and then use that string in your SearchVector

    first we import StringAgg

    from django.contrib.postgres.aggregates import StringAgg
    

    then this is how you have to change your get_queryset function

    def get_queryset(self):
        queryset = super().get_queryset()
        form = self.form_class(self.request.GET)
        if form.is_valid():
            if form.cleaned_data['active_only']:
                queryset = queryset.filter(is_active=True)
    
            if not form.cleaned_data['search']:
                return queryset
    
            queryset = queryset.annotate(tags_str=StringAgg('tags__name', delimiter=' '))
            search_vector = SearchVector('identifier', 'short_text', 'serial_number', 'revision', 'part_number',
                                         'manufacturer', 'description', 'tags_str')
            search_query = SearchQuery(form.cleaned_data['search'], search_type='websearch')
            return (
                queryset.annotate(
                    search=search_vector, rank=SearchRank(search_vector, search_query)
                )
                .order_by("-rank").distinct()
            )
        return queryset