Search code examples
varnishvarnish-vcl

Varnish Xkey softpurge


I'm trying to manage a Xkey soft-purge on objects stored from different hosts, using Xkey as a sort of tag to purge all objects that match the Xkey tag disregarding the hashing. First of all, is it possible? Or is hashing in the loop in any case?

In vcl_recv I manually set a Xkey key with set req.http.xkey = req.url; Then during PURGE call I use set req.http.x-purges = xkey.purge(req.url); to find objects, here non objects are found.

Full VCL:

vcl 4.0;

import std;
import directors;
import purge;
import xkey;


backend server1 {
    .host = "ecommerce-node1-prod";
    .port = "80";
    .probe = {
        .url = "/admin";
        .timeout = 1s;
        .interval = 5s;
        .window = 5;
        .threshold = 3;
    }
}

backend server2 {
    .host = "ecommerce-node2-prod";
    .port = "80";
    .probe = {
       .url = "/admin";
       .timeout = 1s;
       .interval = 5s;
       .window = 5;
       .threshold = 3;
   }
}

sub vcl_init {
    new bar = directors.round_robin();
    bar.add_backend(server1);
    bar.add_backend(server2);
}

# ACL for purgers IP. (This needs to contain app server ips)
acl purgers {
    "127.0.0.1";
    "localhost";
    "::1";
    "ecommerce-node1-prod";
    "ecommerce-node2-prod";
}

sub vcl_recv {
    # Mitigate httpoxy application vulnerability, see: https://httpoxy.org/
    unset req.http.Proxy;
    set req.http.xkey = req.url;

    # Strip query strings only needed by browser javascript. Customize to used tags.
    if (req.url ~ "(\?|&)(pk_campaign|piwik_campaign|pk_kwd|piwik_kwd|pk_keyword|pixelId|kwid|kw|adid|chl|dv|nk|pa|camid|adgid|cx|ie|cof|siteurl|utm_[a-z]+|_ga|gclid)=") {
        # see rfc3986#section-2.3 "Unreserved Characters" for regex
        set req.url = regsuball(req.url, "(pk_campaign|piwik_campaign|pk_kwd|piwik_kwd|pk_keyword|pixelId|kwid|kw|adid|chl|dv|nk|pa|camid|adgid|cx|ie|cof|siteurl|utm_[a-z]+|_ga|gclid)=[A-Za-z0-9\-\_\.\~]+&?", "");
    }
    set req.url = regsub(req.url, "(\?|\?&|&)$", "");

    # Normalize query arguments
    set req.url = std.querysort(req.url);

    # Normalize Accept-Encoding header
    # straight from the manual: https://www.varnish-cache.org/docs/3.0/tutorial/vary.html
    if (req.http.Accept-Encoding) {
        if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg)$") {
            # No point in compressing these
            unset req.http.Accept-Encoding;
        } elsif (req.http.Accept-Encoding ~ "gzip") {
            set req.http.Accept-Encoding = "gzip";
        } elsif (req.http.Accept-Encoding ~ "deflate") {
            set req.http.Accept-Encoding = "deflate";
        } else {
            # unkown algorithm
            unset req.http.Accept-Encoding;
        }
    }


    # Handle PURGE
    if (req.method == "PURGE") {
        if (!client.ip ~ purgers) {
            return (synth(405, "Method not allowed"));
        }


          #set req.http.n-gone = xkey.softpurge(req.http.xkey);
          set req.http.x-purges = xkey.purge(req.url);

          return (synth(200, "Invalidated "+ req.http.x-purges +" objects"));
    }

    if (req.method != "GET" &&
        req.method != "HEAD" &&
        req.method != "PUT" &&
        req.method != "POST" &&
        req.method != "TRACE" &&
        req.method != "OPTIONS" &&
        req.method != "PATCH" &&
        req.method != "DELETE") {
        /* Non-RFC2616 or CONNECT which is weird. */
        return (pipe);
    }

    # We only deal with GET and HEAD by default
    if (req.method != "GET" && req.method != "HEAD") {
        return (pass);
    }

    #pass media to backend (already cached by CloudFront)
    if (req.url ~ "^\/(media|bundles|css|fonts|js|theme|thumbnail)(\/.*)?$") {
        return (pass);
    }

    # Don't cache Authenticate & Authorization
    if (req.http.Authenticate || req.http.Authorization) {
        return (pass);
    }

    # Always pass these paths directly to php without caching
    # Note: virtual URLs might bypass this rule (e.g. /en/checkout)
    if (req.url ~ "^/(checkout|account|admin|api)(/.*)?$") {
        return (pass);
    }

    return (hash);
}

sub vcl_hash {
       # Consider Shopware http cache cookies
    if (req.http.cookie ~ "sw-cache-hash=") {
        hash_data("+context=" + regsub(req.http.cookie, "^.*?sw-cache-hash=([^;]*);*.*$", "\1"));
    } elseif (req.http.cookie ~ "sw-currency=") {
        hash_data("+currency=" + regsub(req.http.cookie, "^.*?sw-currency=([^;]*);*.*$", "\1"));
    }

    #return(lookup);
}

sub vcl_hit {
  
  # Consider client states for response headers
  if (req.http.cookie ~ "sw-states=") {
     set req.http.states = regsub(req.http.cookie, "^.*?sw-states=([^;]*);*.*$", "\1");

     if (req.http.states ~ "logged-in" && obj.http.sw-invalidation-states ~ "logged-in" ) {
        return (pass);
     }

     if (req.http.states ~ "cart-filled" && obj.http.sw-invalidation-states ~ "cart-filled" ) {
        return (pass);
     }
  }



}

sub vcl_backend_response {
    # Fix Vary Header in some cases
    if (beresp.http.Vary ~ "User-Agent") {
        set beresp.http.Vary = regsub(beresp.http.Vary, ",? *User-Agent *", "");
        set beresp.http.Vary = regsub(beresp.http.Vary, "^, *", "");
        if (beresp.http.Vary == "") {
            unset beresp.http.Vary;
        }
    }

    # Respect the Cache-Control=private header from the backend
    if (
        beresp.http.Pragma        ~ "no-cache" ||
        beresp.http.Cache-Control ~ "no-cache" ||
        beresp.http.Cache-Control ~ "private"
    ) {
        set beresp.ttl = 0s;
        set beresp.http.X-Cacheable = "NO:Cache-Control=private";
        set beresp.uncacheable = true;
        return (deliver);
    }

    # strip the cookie before the image is inserted into cache.
    if (bereq.url ~ "\.(png|gif|jpg|swf|css|js|webp)$") {
        unset beresp.http.set-cookie;
    }

    # Allow items to be stale if needed.
    set beresp.grace = 60s;


    # Save the bereq.url so purges work efficiently
    set beresp.http.x-url = bereq.url;
    set beresp.http.X-Cacheable = "YES";

    # Remove the exact PHP Version from the response for more security
    unset beresp.http.x-powered-by;


    return (deliver);
}
   
sub vcl_deliver {
    ## we don't want the client to cache
    set resp.http.Cache-Control = "max-age=0, private";

    # remove link header, if session is already started to save client resources
    if (req.http.cookie ~ "session-") {
        unset resp.http.Link;
    }

    # Set a cache header to allow us to inspect the response headers during testing
    if (obj.hits > 0) {
        unset resp.http.set-cookie;
        set resp.http.X-Cache = "HIT";
    }  else {
        set resp.http.X-Cache = "MISS";
    }

    # Remove the exact PHP Version from the response for more security (e.g. 404 pages)
    unset resp.http.x-powered-by;

    # invalidation headers are only for internal use
    unset resp.http.sw-invalidation-states;

    set resp.http.X-Cache-Hits = obj.hits;
}

Request where xkey is added

*   << Request  >> 32796
-   Begin          req 32795 rxreq
-   Timestamp      Start: 1664372821.615382 0.000000 0.000000
-   Timestamp      Req: 1664372821.615382 0.000000 0.000000
-   VCL_use        boot
-   ReqStart       172.16.0.80 20244 a0
-   ReqMethod      GET
-   ReqURL         /it-it/prodotti/catene/catene-di-luci/
-   ReqProtocol    HTTP/1.1
-   ReqHeader      X-Forwarded-For: 84.247.245.84, 130.176.89.143
-   ReqHeader      X-Forwarded-Proto: https
-   ReqHeader      X-Forwarded-Port: 443
-   ReqHeader      Host: test-prod.luminalpark.com
-   ReqHeader      X-Amzn-Trace-Id: Root=1-63345055-76f876fd27d8613e28b7c9db
-   ReqHeader      User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53
-   ReqHeader      X-Amz-Cf-Id: JA7XrQuv5k-jhdhb3uKmlf88YjlJVvcDcw6udobH5vL-YkSMDT2flw==
-   ReqHeader      Via: 2.0 6c61cea6f371b1744d3b5315a0029062.cloudfront.net (CloudFront)
-   ReqHeader      Cookie: _dc_gtm_UA-830149-18=1; _ga=GA1.1.1015042543.1664369859; _ga_499EG6CXZD=GS1.1.1664371930.2.1.1664372814.0.0.0; _gcl_au=1.1.1616057317.1664369858; _gid=GA1.2.1284203691.1664369859; lp-platform=it-it; lp-state=IT; newUser=1; poll_manager=1; session
-   ReqHeader      Accept-Language: it,it-IT;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,fr;q=0.5
-   ReqHeader      Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
-   ReqHeader      Accept-Encoding: gzip, deflate, br
-   ReqHeader      cache-control: max-age=0
-   ReqHeader      sec-ch-ua: "Microsoft Edge";v="105", "Not)A;Brand";v="8", "Chromium";v="105"
-   ReqHeader      sec-ch-ua-mobile: ?0
-   ReqHeader      sec-ch-ua-platform: "Windows"
-   ReqHeader      dnt: 1
-   ReqHeader      upgrade-insecure-requests: 1
-   ReqHeader      sec-fetch-site: none
-   ReqHeader      sec-fetch-mode: navigate
-   ReqHeader      sec-fetch-user: ?1
-   ReqHeader      sec-fetch-dest: document
-   ReqHeader      CloudFront-Viewer-HTTP-Version: 2.0
-   ReqHeader      CloudFront-Forwarded-Proto: https
-   ReqHeader      CloudFront-Viewer-Address: 84.247.245.84:59146
-   ReqHeader      CloudFront-Viewer-TLS: TLSv1.3:TLS_AES_128_GCM_SHA256:connectionReused
-   ReqHeader      X-Cloudfront-Origin: VC5ZNQ588QNE3S
-   ReqUnset       X-Forwarded-For: 84.247.245.84, 130.176.89.143
-   ReqHeader      X-Forwarded-For: 84.247.245.84, 130.176.89.143, 172.16.0.80
-   ReqUnset       Via: 2.0 6c61cea6f371b1744d3b5315a0029062.cloudfront.net (CloudFront)
-   ReqHeader      Via: 2.0 6c61cea6f371b1744d3b5315a0029062.cloudfront.net (CloudFront), 1.1 ip-172-16-3-107 (Varnish/trunk)
-   VCL_call       RECV
-   ReqHeader      xkey: /it-it/prodotti/catene/catene-di-luci/
-   ReqURL         /it-it/prodotti/catene/catene-di-luci/
-   ReqURL         /it-it/prodotti/catene/catene-di-luci/
-   ReqUnset       Accept-Encoding: gzip, deflate, br
-   ReqHeader      Accept-Encoding: gzip
-   VCL_return     hash
-   VCL_call       HASH
-   VCL_return     lookup
-   Hit            32771 7190.679091 60.000000 0.000000
-   VCL_call       HIT
-   VCL_return     deliver
-   RespProtocol   HTTP/1.1
-   RespStatus     200
-   RespReason     OK
-   RespHeader     Server: nginx/1.18.0 (Ubuntu)
-   RespHeader     Content-Type: text/html; charset=UTF-8
-   RespHeader     Cache-Control: must-revalidate, public, s-maxage=7200
-   RespHeader     Date: Wed, 28 Sep 2022 13:46:52 GMT
-   RespHeader     Strict-Transport-Security: max-age=31536000; includeSubDomains
-   RespHeader     X-Frame-Options: deny
-   RespHeader     X-Content-Type-Options: nosniff
-   RespHeader     Referrer-Policy: strict-origin-when-cross-origin
-   RespHeader     sw-invalidation-states:
-   RespHeader     Set-Cookie: sw-states=deleted; expires=Tue, 28-Sep-2021 13:46:51 GMT; Max-Age=0; path=/; secure; httponly; samesite=lax
-   RespHeader     Set-Cookie: sw-cache-hash=deleted; expires=Tue, 28-Sep-2021 13:46:51 GMT; Max-Age=0; path=/; httponly
-   RespHeader     Content-Encoding: gzip
-   RespHeader     Vary: Accept-Encoding
-   RespHeader     x-url: /it-it/prodotti/catene/catene-di-luci/
-   RespHeader     X-Cacheable: YES
-   RespHeader     X-Varnish: 32796 32771
-   RespHeader     Age: 9
-   RespHeader     Via: 1.1 ip-172-16-3-107 (Varnish/trunk)
-   RespHeader     Accept-Ranges: bytes
-   VCL_call       DELIVER
-   RespUnset      Cache-Control: must-revalidate, public, s-maxage=7200
-   RespHeader     Cache-Control: max-age=0, private
-   RespUnset      Set-Cookie: sw-states=deleted; expires=Tue, 28-Sep-2021 13:46:51 GMT; Max-Age=0; path=/; secure; httponly; samesite=lax
-   RespUnset      Set-Cookie: sw-cache-hash=deleted; expires=Tue, 28-Sep-2021 13:46:51 GMT; Max-Age=0; path=/; httponly
-   RespHeader     X-Cache: HIT
-   RespUnset      sw-invalidation-states:
-   RespHeader     X-Cache-Hits: 1
-   VCL_return     deliver
-   Timestamp      Process: 1664372821.615457 0.000074 0.000074
-   Filters
-   RespHeader     Content-Length: 110820
-   RespHeader     Connection: keep-alive
-   Timestamp      Resp: 1664372821.615548 0.000166 0.000091
-   ReqAcct        1598 0 1598 619 110820 111439
-   End

Purge request log, showing no objects found:

*   << Request  >> 1277962
-   Begin          req 1277960 rxreq
-   Timestamp      Start: 1664372901.809064 0.000000 0.000000
-   Timestamp      Req: 1664372901.809064 0.000000 0.000000
-   VCL_use        boot
-   ReqStart       172.16.2.136 51214 a0
-   ReqMethod      PURGE
-   ReqURL         /it-it/prodotti/catene/catene-di-luci/
-   ReqProtocol    HTTP/1.1
-   ReqHeader      Host: 172.16.3.107
-   ReqHeader      User-Agent: GuzzleHttp/7
-   ReqHeader      X-Forwarded-For: 172.16.2.136
-   ReqHeader      Via: 1.1 ip-172-16-3-107 (Varnish/trunk)
-   VCL_call       RECV
-   ReqHeader      xkey: /it-it/prodotti/catene/catene-di-luci/
-   ReqURL         /it-it/prodotti/catene/catene-di-luci/
-   ReqURL         /it-it/prodotti/catene/catene-di-luci/
-   ReqHeader      x-purges: 0
-   VCL_return     synth
-   VCL_call       HASH
-   VCL_Log        hashing for BAN request
-   VCL_return     lookup
-   RespProtocol   HTTP/1.1
-   RespStatus     200
-   RespReason     Invalidated 0 objects
-   RespHeader     Date: Wed, 28 Sep 2022 13:48:21 GMT
-   RespHeader     Server: Varnish
-   RespHeader     X-Varnish: 1277962
-   VCL_call       SYNTH
-   RespHeader     Content-Type: text/html; charset=utf-8
-   RespHeader     Retry-After: 5
-   VCL_return     deliver
-   Timestamp      Process: 1664372901.809126 0.000061 0.000061
-   RespHeader     Content-Length: 287
-   Storage        malloc Transient
-   Filters
-   RespHeader     Connection: keep-alive
-   Timestamp      Resp: 1664372901.809155 0.000090 0.000029
-   ReqAcct        101 0 101 213 287 500
-   End

Solution

  • It's perfectly possible, but you need to add the header to beresp object (in vcl_backend_response) to have the objects properly tagged