Search code examples
cachingvarnishvarnish-vcl

varnish mixing up user agent in requests


I have just installed varnish and i am using http://github.com/varnish/varnish-devicedetect to detect the device and serve different content to users. It works well but sometimes i end up with a user agent different than the device! for example: when i browse my site from an android device, i sometimes get a user agent of pc!

I have tried going over my vcl looking for mistakes but i couldn't figure it out.

One more thing:

I am behind a router and there are other users on the network opening the site(possibly same page). We only have one public ip and everybody is assigned a local ip. That wasn't the case it was randomly picking a different user agent not just of local devices.

How does varnish differentiate two requests going to the same page from the same ip?

EDIT: my vcl code

import throttle;
import redis;

include "devicedetect.vcl"; #https://raw.githubusercontent.com/varnish/varnish-devicedetect/master/devicedetect.vcl

backend default {
   .host = "127.0.0.1";
   .port = "8080";
}

acl admin_ip {
   "XXX.XXX.XXX.XXX";
}

sub vcl_init {

# By default, the redis module will attempt to connect to a Redis server
# at 127.0.0.1:6379 with a connect timeout of 200 milliseconds.

# The function redis.init_redis(host, port, timeout_ms) may be used to
# connect to an alternate Redis server or use a different connect timeout.

   redis.init_redis("localhost", 6379, 200);  /* default values */
}

sub vcl_recv {
   call devicedetect;
   if (req.request == "PURGE") {
        if (!client.ip ~ admin_ip) {
            error 405 "You can't do this, muggle!";
        }
            return(lookup);
   }  
   if(throttle.is_allowed("ip:" + client.ip, "2000req/10m") > 0s) {
                    redis.send("SELECT 5");
                    redis.send("SADD blacklist " + client.ip);
                    error 429 "Too many requests";
   }
  }

  # Use anonymous, cached pages if all backends are down.
  if (!req.backend.healthy) {
    unset req.http.Cookie;
  }

  set req.http.X-Forwarded-For = client.ip;

  if (
      req.url ~ "^/admin$" ||
      req.url ~ "^/admin/.*$" ||
      req.url ~ "^/blog/wp-admin/" ||
      req.url ~ "^/blog/wp-login.php" ||
      req.url ~ "^/blog/wp-admin/.*$") {
       return (pass);
  }  

  if (req.url ~ "(?i)\.(pdf|asc|dat|txt|doc|xls|ppt|tgz|csv|png|gif|jpeg|jpg|ico|swf|css|js)(\?.*)?$") {
    unset req.http.Cookie;
    unset req.http.Cache-Control;
    unset req.http.Max-Age;
    unset req.http.Pragma;
    unset req.http.Cookie;
  }
  if(req.http.Cookie) {
     if (req.http.Cookie !~ "(sessionid|XXXid)" ) {
       remove req.http.Cookie;
     }
  }

}

sub vcl_fetch {
   set beresp.http.X-UA-Device = req.http.X-UA-Device;
   #unset beresp.http.expires; # for cloudfront since it prefers cache-control
                              # header over expires

   if (req.url ~ "(?i)\.(pdf|asc|dat|txt|doc|xls|ppt|tgz|csv|png|gif|jpeg|jpg|ico|swf|css|js)(\?.*)?$") {
    unset beresp.http.set-cookie;
   }
   if (req.http.Content-Type ~ "(image|audio|video|pdf|flash)") {
        set beresp.do_gzip = false;
   }
   if (req.http.Content-Type ~ "text") {
        set beresp.do_gzip = true;
   }
  # Varnish determined the object was not cacheable
    if (beresp.ttl <= 0s) {
        set beresp.http.X-Cacheable = "NO:Not Cacheable";

    # You don't wish to cache content for logged in users
    } elsif (req.http.Cookie ~ "(UserID|_session)") {
        set beresp.http.X-Cacheable = "NO:Got Session";
        return(hit_for_pass);

    # You are respecting the Cache-Control=private header from the backend
    } elsif (beresp.http.Cache-Control ~ "private") {
        set beresp.http.X-Cacheable = "NO:Cache-Control=private";
        return(hit_for_pass);

    # Varnish determined the object was cacheable
    } else {
        set beresp.http.X-Cacheable = "YES";
    }
  if (req.backend.healthy) {
        set req.grace = 10m;
  } else {
        set req.grace = 1h;
  }
  return (deliver);
}

sub vcl_fetch {
     if (beresp.ttl <= 0s ||
         beresp.http.Set-Cookie ||
         beresp.http.Vary == "*") {
               /*
                * Mark as "Hit-For-Pass" for the next 2 minutes
                */
               set beresp.ttl = 120 s;
               return (hit_for_pass);
     }
     set beresp.grace = 1h;
     return (deliver);
}

sub vcl_pass {
     return (pass);
}

sub vcl_hash {
     hash_data(req.url);
     if (req.http.host) {
         hash_data(req.http.host);
     } else {
         hash_data(server.ip);
     }
     return (hash);
}

sub vcl_hit {
     if (req.request == "PURGE"){
        set obj.ttl = 0s;
        error 200 "Varnish cache has been purged for this object.";
     }
     return (deliver);
}

sub vcl_miss {
     if (req.request == "PURGE") {
        error 404 "Object not in cache.";
     } 
     return (fetch);
}


sub vcl_deliver {
  #std.log("DEV: Hits on " + req.url + ": " + obj.hits);

  if (obj.hits > 0) {
    set resp.http.X-Varnish-Cache = "HIT";
  }
  else {
    set resp.http.X-Varnish-Cache = "MISS";
  }

  return (deliver);
}

sub vcl_error {
  # Redirect to some other URL in the case of a homepage failure.
  #if (req.url ~ "^/?$") {
  #  set obj.status = 302;
  #  set obj.http.Location = "http://backup.example.com/";
  #}

  # Otherwise redirect to the homepage, which will likely be in the cache.
  set obj.http.Content-Type = "text/html; charset=utf-8";
  synthetic {"
<html>
<head>
  <title>Error "} + obj.status + {" </title>
  <style>
    body { background: #FFF; text-align: center; color: black; }
    #page { border: 1px solid #CCC; width: 500px; margin: 100px auto 0; padding: 30px; background: f2f2f2; }
    .error { color: #222; }
  </style>
</head>
<body>
  <div id="page">
    <h1 class="title">Error "} + obj.status + " " + obj.response + {"</h1>
    <p>The page you requested may be temporarily unavailable.</p>
    <p>Message blah blah!</p>
  </div>
</body>
</html>
"};
  return (deliver);
}

Solution

  • You should start by checking out the installation documentation for varnish-devicedetect. Depending on how your backend is configured, you can use any of the examples listed in the documentation.

    The most simple case (example 1) is adding the X-UA-Device header to the Vary header, so that the different content served for different devices gets cached uniquely. Note that Varnish uses Vary independently from vcl_hash(), so you don't need to add Vary or X-UA-Device header to hash yourself (reference).

    Here is the example with comments from the documentation (link above) for reference.

    include "devicedetect.vcl";
    sub vcl_recv { call devicedetect; }
    # req.http.X-UA-Device is copied by Varnish into bereq.http.X-UA-Device
    
    # so, this is a bit conterintuitive. The backend creates content based on the normalized User-Agent,
    # but we use Vary on X-UA-Device so Varnish will use the same cached object for all U-As that map to
    # the same X-UA-Device.
    # If the backend does not mention in Vary that it has crafted special
    # content based on the User-Agent (==X-UA-Device), add it.
    # If your backend does set Vary: User-Agent, you may have to remove that here.
    sub vcl_fetch {
        if (req.http.X-UA-Device) {
            if (!beresp.http.Vary) { # no Vary at all
                set beresp.http.Vary = "X-UA-Device";
            } elseif (beresp.http.Vary !~ "X-UA-Device") { # add to existing Vary
                set beresp.http.Vary = beresp.http.Vary + ", X-UA-Device";
            }
        }
        # remove comment for testing, be careful to use this in prod
        # Google might be worried about crafted content
        # set beresp.http.X-UA-Device = req.http.X-UA-Device;
    }
    
    # to keep any caches in the wild from serving wrong content to client #2 behind them, we need to
    # transform the Vary on the way out.
    sub vcl_deliver {
        if ((req.http.X-UA-Device) && (resp.http.Vary)) {
            set resp.http.Vary = regsub(resp.http.Vary, "X-UA-Device", "User-Agent");
        }
    }