Setup a shared Varnish server

apache-2.2varnishvirtualhost

Having set up and configured Varnish Cache to handle caching for a single site, I'm looking into expanding the service to cover multiple sites running across multiple webservers, each in turn running multiple vHosts using NameVirtualHost. Here's a rough diagram of what I'm wanting to do.

shared Varnish infrastructure

Can anyone suggest how I can do this and how to make sure that Varnish routes requests to the correct backend since I've had issues trying to have Varnish handle multiple backends based on incoming URL, but I think I set the VCL up wrong, leading to all manner of exciting problems.

Be aware that I don't want to use Varnish to load balance between servers. That might come later, or I might stick a load balancer in front of a cluster of Varnish instances.

Solved

Here's the VCL I ended up using. It's a bit long and laborious and if anyone has suggestions for ways to optimise it, I'd be appreciative.

# Varnish - Shared caching cluster
# 
backend live {
    .host = "sharedserver1.example.com";
    .port = "80";
}

backend staging {
    .host = "sharedserver2.example.com";
    .port = "80";
}


acl purge {
    "localhost";
    "127.0.0.1";
}

sub vcl_recv {
    # If it's NOT a dev site or something weird, direct to the live backend. 
    if(req.http.host !~ ".dev.example.com") {
        set req.backend = live;
        return (lookup);
    }
    # Else, direct it to staging backend and bypass the cache. 
    else {
        set req.backend = staging;
        return (pass);
    }
    # Don't cache search pages or any other dynamic content/forms

    # Check if backend is healthy, otherwise say 'sod it' and serve outdated content for X hours. 
    if (req.backend.healthy) {
        set req.grace = 30s;
    }
    else {
        set req.grace = 24h;
    }


    # Allow stale items to be served for 8 hours
    #set resp.grace = 8h;   

    ## Remove the X-Forwarded-For header if it exists.
    remove req.http.X-Forwarded-For;
    set req.http.X-Forwarded-For = client.ip;

    # Single-file PURGE commands
    if (req.request == "PURGE") {
        if(!client.ip ~ purge) {
            error 405 "Not allowed";
        }
        return (lookup);
    }

    # Nuclear option: BAN
    if (req.request == "BAN") {
                # Same ACL check as above:
                if (!client.ip ~ purge) {
                        error 405 "Not allowed.";
                }

        # Clear any cached object containing req.url
        ban("req.url ~ " + req.url);
        # Clear any cached object matching req.url
        ban("req.url == " + req.url);
        # Clear any cached object matching req.url AND matching the hostname
                ban("req.http.host == " + req.http.host + "&& req.url == " + req.url);

                # Throw a synthetic page so the
                # request won't go to the backend.
                error 200 "Ban added";
        }

    if (req.http.Accept-Encoding) {
        if (req.http.Accept-Encoding ~ "gzip") {
            # If browser supports gzip strip other encodings from request
            set req.http.Accept-Encoding = "gzip";
        }
        else if (req.http.Accept-Encoding ~ "deflate") {
            # If browser supports deflate strip other encodings from request
            set req.http.Accept-Encoding = "deflate";
        }
        else {
            # Unknown encoding in header, remove it
            unset req.http.Accept-Encoding;
        }
    }   

    # Ignore requests for fresh content; cache everything.
    unset req.http.Cache-Control;
    unset req.http.Max-Age;
    unset req.http.Pragma;
    unset req.http.Cookie;

    # Strip hash, server doesn't need it.
    if (req.url ~ "\#") {
        set req.url=regsub(req.url,"\#.*$","");
    }
    # Strip out Google related parameters
    if(req.url ~ "(\?|&)(utm_source|utm_medium|utm_campaign|gclid|cx|ie|cof|siteurl)=") {
        set req.url=regsuball(req.url,"&(utm_source|utm_medium|utm_campaign|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)","");
        set req.url=regsuball(req.url,"\?(utm_source|utm_medium|utm_campaign|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)","?");
        set req.url=regsub(req.url,"\?&","?");
        set req.url=regsub(req.url,"\?$","");
    }

    # Strip cookies for static files:
        if (req.url ~ "\.(jpg|jpeg|gif|png|ico|css|zip|tgz|gz|rar|bz2|pdf|txt|tar|wav|bmp|rtf|js|flv|swf|html|htm)(\?[a-z0-9]+)?$") {
                unset req.http.Cookie;
                return(lookup);
        }

    # Remove has_js and Google Analytics __* cookies.
    set req.http.Cookie = regsuball(req.http.Cookie, "(^|;\s*)(__[a-z]+|has_js)=[^;]*", "");

    # Remove a ";" prefix, if present.
    set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");

    # Remove empty cookies.
    if (req.http.Cookie ~ "^\s*$") {
            unset req.http.Cookie;
    }
}

sub vcl_hit {
    if (req.request == "PURGE") {
        purge;
        error 200 "Purged!";
    }
}

sub vcl_hash {
    if (req.http.Cookie) {
        #set req.hash += req.http.Cookie;
        #hash_data(req.url);
        hash_data(req.http.cookie);
    }
}

sub vcl_fetch {
    # If backend is dead DO NOT CACHE 404s
    if (beresp.status == 404) {
        set beresp.ttl = 0s;
    }

    # Strip cookies for static files:
        if (req.url ~ "\.(jpg|jpeg|gif|png|ico|css|zip|tgz|gz|rar|bz2|pdf|txt|tar|wav|bmp|rtf|js|flv|swf|html|htm)$") {
                unset beresp.http.set-cookie;
        }
        # Varnish determined the object was not cacheable
        if (!beresp.ttl > 0s) {
                set beresp.http.X-Cacheable = "NO:Not Cacheable";
        } elsif(req.http.Cookie ~"(UserID|_session)") {
                # You don't wish to cache content for logged in users
                set beresp.http.X-Cacheable = "NO:Got Session";
                return(hit_for_pass);
        }  elsif ( beresp.http.Cache-Control ~ "private") {
                # You are respecting the Cache-Control=private header from the backend
                set beresp.http.X-Cacheable = "NO:Cache-Control=private";
                return(hit_for_pass);
        } elsif ( beresp.ttl < 1s ) {
                # You are extending the lifetime of the object artificially
                set beresp.ttl   = 300s;
                set beresp.grace = 300s;
                set beresp.http.X-Cacheable = "YES:Forced";
        }  else {
                # Varnish determined the object was cacheable
                set beresp.http.X-Cacheable = "YES";
        }

    set beresp.http.x-url = req.url;
    # Allow stale items to be served for 8 hours

    set beresp.grace = 30s; 
        set beresp.http.x-host = req.http.host;
        return(deliver);
}

sub vcl_deliver {
    remove resp.http.X-Varnish;
    remove resp.http.Via;
    remove resp.http.Age;
    unset resp.http.x-host;
    remove resp.http.X-Cacheable;       
    ## We'd like to hide the X-Powered-By headers. Nobody has to know we can run PHP and have version xyz of it.
        remove resp.http.X-Powered-By;
}

sub vcl_miss {
    if (!req.backend.healthy) {
        return (error);
    }
    if (req.request == "PURGE") {
                purge;
                error 404 "Not In Cache";
        }
}

sub vcl_pass {
    if (req.request == "PURGE") {
        error 502 "PURGE on a missed object";
    }
}

sub vcl_error {
    #if (!req.backend.healthy && obj.status != 200 && obj.status != 403 && obj.status != 404 && obj.status != 301 && obj.status != 302) {
    if (obj.status != 200 && obj.status != 403 && obj.status != 404 && obj.status != 301 && obj.status != 302) {
    #if (!req.backend.healthy && obj.status!=200) {
        synthetic{"
<!doctype html>
<html>
<body><h1>it's dead,dave</h1></body>
</html>
"};
        return (deliver);
    }
}

Best Answer

So, in the VCL file, first simply define the backend with some name like this

 backend lorem {
  .host = "10.0.0.1";
  .port = "8088";
 }

Then define this in vcl_recv section

 if(req.http.host ~ "loren.com"){
      set req.backend = nginx;
      return(pass);
 }

This will send the all the traffic to loren.com (based on hostname) to the defined backend.

Define different back-ends for the different sites and it will work like a charm.

Related Topic