Feb 12

Sitemaps for Django static pages

Django sitemaps is a great framework to generate sitemaps for your site. The only problem is that it's not quite ready for static pages -pages where you use render_to_response because the flatpages app does not meet your needs.

How can we add them to our sitemap and provide a sound last modification date? Some approaches I saw on the net returned just datetime.datetime.now() but that's inaccurate. I ended up coding the following solution:

# Given the following urls.py
from foo.catalog.sitemaps import CategorySiteMap, ProductSiteMap
from foo.static.sitemaps import StaticSiteMap

...

urlpatterns += patterns('foo.static.views',
    url(r'^about/$', 'about', {'template': 'static/about.html'}, name='static_about'),
    url(r'^faq/$', 'faq', {'template': 'static/faq.html'}, name='static_faq'),
    url(r'^mission/$', 'mission', {'template': 'static/mission.html'}, name='static_mission'),
    url(r'^terms/$', 'terms', {'template': 'static/terms.html'}, name='static_terms'),
)

sitemaps = {
    'categories': CategorySiteMap,
    'products': ProductSiteMap,
    'static': StaticSitemap(urlpatterns)
}

urlpatterns += patterns('django.contrib.sitemaps.views',
    (r'^sitemap\.xml$', 'sitemap', {'sitemaps': sitemaps}),
)

I created the following class that I include in my sitemaps dictionary:

import datetime
import os

from django.contrib import sitemaps
from django.core import urlresolvers
from django.conf import settings

class StaticSitemap(sitemaps.Sitemap):
    """Return the static sitemap items"""
    priority = 0.5

    def __init__(self, patterns):
        self.patterns = patterns
        self._items = {}
        self._initialize()

    def _initialize(self):
        for p in self.patterns:
            if getattr(p, 'name', None) is not None and p.name.startswith('static_'):
                self._items[p.name] = self._get_modification_date(p)

    def _get_modification_date(self, p):
        template = p.default_args['template']
        template_path = self._get_template_path(template)
        mtime = os.stat(template_path).st_mtime
        return datetime.datetime.fromtimestamp(mtime)

    def _get_template_path(self, template_path):
        for template_dir in settings.TEMPLATE_DIRS:
            path = os.path.join(template_dir, template_path)
            if os.path.exists(path):
                return path

        return None

    def items(self):
        return self._items.keys()

    def changefreq(self, obj):
        return 'monthly'

    def lastmod(self, obj):
        return self._items[obj]

    def location(self, obj):
        return urlresolvers.reverse(obj)

Now my sitemap includes the static pages with an accurate last modified date.

UPDATE: Fixed typo, thanks Soviut