I'm working on sitemap.xml
generation for Django + Wagtail project.
I implemented xml
generation for articles by overriding get_sitemap_urls
method. But the problem is that Wagtail sitemap generator doesn't "see" the blog tags urls (doesn't add them to the sitemap).
...
from taggit.models import TaggedItemBase
class BlogPageTag(TaggedItemBase):
content_object = ParentalKey(
'BlogInnerPage',
related_name='tagged_items',
on_delete=models.CASCADE,
)
class BlogInnerPage(Page):
icon = models.ForeignKey(
'wagtailimages.Image',
null=True,
blank=False,
on_delete=models.SET_NULL,
related_name='+'
)
...
post_date = models.DateTimeField(auto_now_add=True, null=True)
tags = ClusterTaggableManager(through=BlogPageTag, blank=False)
@property
def sidebar_tags(self):
blogs = BlogInnerPage.objects.live().all()
tags = {}
for post in blogs:
for tag in post.tags.all():
if tag.slug in tags:
tags[tag.slug]['count'] += 1
else:
tags[tag.slug] = {
'name': tag.name,
'count': 1
}
return sorted(tags.items())
...
def get_sitemap_urls(self):
return [
{
'location': self.full_url,
'lastmod': self.last_published_at,
'changefreq': 'weekly',
'priority': .8
}
]
I'm expecting to see the following result for tags:
<url>
<loc>https://example.com/?tag=design</loc>
<lastmod>2019-01-31T12:24:01+00:00</lastmod>
<priority>0.80</priority>
</url>
Here is what I've got for blog articles:
<url>
<loc>
http://example.com/trends-booming-todays-it-industry/
</loc>
<lastmod>2018-10-04</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
You're on the right track with get_sitemap_urls
. The default implementation only returns an entry for the page itself because it can't possibly know about all the query parameters you might be filtering on. Therefore, it's up to you to add those entries to the list.
Assuming you have 2 page classes, the HomePage
and the BlogInnerPage
, you'll want to leave the BlogInnerPage
implementation as you have it and update the HomePage
to return its own sitemap entry and add tag entries.
from urllib.parse import urlencode
from django.db.models import Max
from taggit.models import Tag
from wagtail.core.models import Page
class HomePage(Page):
# Note that the method signature should accept an optional request parameter as of Wagtail 2.2
def get_sitemap_urls(self, request=None):
urls = super().get_sitemap_urls(request)
# Get the page's URL, we will use that later.
base_url = self.get_full_url(request)
# Get the IDs of all the tags used on your `BlogPage`s.
tag_ids = BlogPageTag.objects.values_list('tag_id', flat=True).distinct()
# 1. Filter all the tags with the IDs fetched above.
# 2. Annotate the query with the latest `last_published_at` of the associated pages.
# Note the `home_` part in the string, this needs to be replaced by the name of the Django app your `BlogPageTag` model lives in.
# 3. Only fetch the slug and lastmod (we don't need the full object).
tags = Tag.objects\
.filter(pk__in=tag_ids)\
.annotate(lastmod=Max('home_blogpagetag_items__content_object__last_published_at'))\
.values('slug', 'lastmod')
# Add sitemap entries for each tag.
for tag in tags:
urls.append({
'location': '{}?{}'.format(base_url, urlencode({'tag': tag.slug})),
'lastmod': tag.lastmod,
})
# Return the results.
return urls