Search code examples
regexvarnishvarnish-vcl

How to add trailing slash with varnish?


I want to add a trailing slash to all URL's with varnish (via 301 redirect).

I was surprised I couldn't find anything about online anywhere, however.

This was the closest I got, but is obviously broken because it doesn't account for query strings or or anything with a . in it.

if (req.url !~ "/$") {
  return (synth (751, ""));
}

...

sub vcl_synth {
  if (resp.status == 750) {
    set resp.status = 301;
    set resp.http.Location = "http://www.example.com" + req.url;
    return(deliver);
  }
}

Test cases I want to account for

example.com/xyz?query=string -> www.example.com/xyz/?query=string (add www, add /)

example.com/api/latest.json -> www.example.com/api/latest.json (add www, dont add /)


Solution

  • Here is a solution for Varnish 3 that you could translate into Varnish 4. I don't have Varnish 4 handy myself:

    sub vcl_recv {
        if (req.http.Host !~ "^www\." || (
               req.url !~ {"(?x)
                   (?:/$)         # last character isn't a slash
                   |              # or
                   (?:/\?)        # query string isn't immediately preceded by a slash
               "} &&
               req.url ~ {"(?x)
                   (?:/[^./]+$)   # last path segment doesn't contain a . no query string
                   |              # or
                   (?:/[^.?]+\?)  # last path segment doesn't contain a . with a query string
               "})
        ) {
            error 720;
        }
    }
    
    sub vcl_error {
        if (obj.status == 720) {
            set obj.status = 301;
    
            set obj.http.Location = "http://";
            set obj.http.Host = req.http.Host;
            if (obj.http.Host !~ "^www\.") {
                // for www. prefix
                set obj.http.Host = "www." + obj.http.Host;
            }
            set obj.http.Location = obj.http.Location + obj.http.Host;
    
            if (req.url ~ "(?:/[^./]+$)|(?:/[^.?]+\?)") {
                // no . in last path segment before optional query string
                if (req.url !~ "/$" && req.url !~ "\?") {
                    // no trailing slash and no query string
                    set obj.http.Location = obj.http.Location + req.url + "/";
                } else if (req.url ~ "[^/]\?") {
                    // no trailing slash and with query string, preserve it
                    set obj.http.Location = obj.http.Location +
                      regsub(req.url, "([^?]+)\?.*", "\1") +
                      "/" +
                      regsub(req.url, "[^?]+(\?.*)", "\1");
                } else if (obj.http.Host != req.http.Host) {
                    // trailing slash rule met, handle missing www. scenario
                    set obj.http.Location = obj.http.Location + req.url;
                }
            } else if (obj.http.Host != req.http.Host) {
                // last path segment contains a . so handle missing www. scenario
                set obj.http.Location = obj.http.Location + req.url;
            }
            set obj.response = "Moved Permanently";
        }
    }
    

    I have a Varnish test case file that exercises the various URLs you are interested in as well:

    varnishtest "Testing adding trailing slash"
    
    server s1 {
        rxreq
        txresp -body "hello world"
    } -repeat 4 -start
    
    varnish v1 -vcl+backend {
        include "${pwd}/26921577.vcl";
    } -start
    
    client c1 {
        txreq -url "/document/" -hdr "Host: www.example.com"
        rxresp
    
        expect resp.status == 200
        expect resp.body == "hello world"
    } -run
    
    client c2 {
        txreq -url "/document" -hdr "Host: www.example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/document/"
    } -run
    
    client c3 {
        txreq -url "/document/" -hdr "Host: example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/document/"
    } -run
    
    client c4 {
        txreq -url "/document" -hdr "Host: example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/document/"
    } -run
    
    client c5 {
        txreq -url "/xyz/?query=string" -hdr "Host: www.example.com"
        rxresp
    
        expect resp.status == 200
        expect resp.body == "hello world"
    } -run
    
    client c6 {
        txreq -url "/xyz?query=string" -hdr "Host: www.example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/xyz/?query=string"
    } -run
    
    client c7 {
        txreq -url "/xyz/?query=string" -hdr "Host: example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/xyz/?query=string"
    } -run
    
    client c8 {
        txreq -url "/xyz?query=string" -hdr "Host: example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/xyz/?query=string"
    } -run
    
    client c9 {
        txreq -url "/api/latest.json" -hdr "Host: www.example.com"
        rxresp
    
        expect resp.status == 200
        expect resp.body == "hello world"
    } -run
    
    client c10 {
        txreq -url "/api/latest.json" -hdr "Host: example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/api/latest.json"
    } -run
    
    client c11 {
        txreq -url "/api/latest.json?query=string" -hdr "Host: www.example.com"
        rxresp
    
        expect resp.status == 200
        expect resp.body == "hello world"
    } -run
    
    client c12 {
        txreq -url "/api/latest.json?query=string" -hdr "Host: example.com"
        rxresp
    
        expect resp.status == 301
        expect resp.http.Location == "http://www.example.com/api/latest.json?query=string"
    } -run
    
    varnish v1 -expect client_req == 12