Pivert's Blog

How to use parameter substitution to shorten keepalived.conf


Reading Time: 4 minutes

If you use keepalived for your HA services, and especially if you use the virtual_server options, you might have great use of Substitutions capabilities in the config file.

I’ll just post 2 identical configurations, both working from keepalived 2.2 (which comes by default on Ubuntu 22.04), so you can judge by yourself. The configuration with Substitutions 3 times shorter than the long configuration and IMHO is much easier to read and to manage.

I decided to post this config because I did find the manual not so easy, and have unsuccessfully tried to prevent repetitions with virtual_server_group.

The 2 below config only shows the virtual_servers part and produces the same configuration. Of course, you can use the same for vrrp_instance.
By reading the example and checking a bit the manual on the SEQ, LST and variables, you should be able to simplify your configuration easily.

The variables with RS are for Real Servers, and the _ID are the last octet of the IP address… It might be easier to read with a view on the result at the end.

Short version

$DEFAULT_RS_OPTIONS= \
delay_loop 20 \
lb_algo rr \
lb_kind NAT \
persistence_timeout 180 \
protocol TCP

# Templates
$RS_TPL= \
real_server ${BASE_RS}${RS_ID} ${PORT} { \
    weight 1 \
    TCP_CHECK { \
        connect_timeout 3 \
    } \
}

$VS_TPL= \
virtual_server $VS ${PORT} { \
    $DEFAULT_RS_OPTIONS \
    ~SEQ(RS_ID, $MIN_RS, $MAX_RS) $RS_TPL \
}
    
# Config
$BASE_RS=10.10.66.
$MIN_RS=31
$MAX_RS=33

$VS=10.10.178.11
~LST(PORT, 80, 443, 2222) $VS_TPL

# 8082 Traefik
# 9323 Docker Prometheus metrics
$VS=10.10.232.1
~LST(PORT, 8082, 9323) $VS_TPL

# 9221 Proxmox Exporter
$BASE_RS=10.10.178.
$MIN_RS=2
$MAX_RS=4
~LST(PORT, 9221) $VS_TPL

# Prometheus metrics from Ceph
$CEPH_RS_TPL= \
real_server 10.10.178.${RS_ID} 9283 { \
    weight 1 \
    MISC_CHECK { \
        misc_path "${_PWD}/scripts/head_long.py http://10.10.178.${RS_ID}:9283/metrics" \
        misc_timeout 5 \
    } \
}
virtual_server 10.10.232.1 9283 {
    $DEFAULT_RS_OPTIONS
    ~SEQ(RS_ID, 2, 4) $CEPH_RS_TPL
}

# Galera cluster (MariaDB)
$GALERA_RS= \
real_server 10.10.10.${RS_ID} 3306 { \
    weight 1 \
    TCP_CHECK { \
        connect_timeout 3 \
    } \
    MISC_CHECK { \
        misc_path "${_PWD}/scripts/keepalived_http_check.py http://10.10.10.${RS_ID}:9104/metrics" \
        misc_timeout 5 \
        user dyngandi \
    } \
}

virtual_server 10.10.232.1 3306 {
    $DEFAULT_RS_OPTIONS
    delay_loop 15
    ~SEQ(RS_ID,51,53) $GALERA_RS
}

Long version

virtual_server 10.10.178.11 80 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180
    protocol TCP
    real_server 10.10.66.31 80 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.32 80 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.33 80 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
}

virtual_server 10.10.178.11 443 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180
    protocol TCP
    real_server 10.10.66.31 443 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.32 443 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.33 443 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
}

virtual_server 10.10.178.11 2222 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180
    protocol TCP
    real_server 10.10.66.31 2222 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.32 2222 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.33 2222 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
}

virtual_server 10.10.232.1 8082 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180
    protocol TCP
    real_server 10.10.66.31 8082 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.32 8082 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.33 8082 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
}

virtual_server 10.10.232.1 9323 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180
    protocol TCP
    real_server 10.10.66.31 9323 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.32 9323 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.66.33 9323 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
}

virtual_server 10.10.232.1 9221 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180 
    protocol TCP
    real_server 10.10.178.2 9221 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.178.3 9221 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
    real_server 10.10.178.4 9221 {
        weight 1
        TCP_CHECK {
            connect_timeout 3
        }
    }
}

# Prometheus metrics from Ceph
virtual_server 10.10.232.1 9283 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180 
    protocol TCP
    real_server 10.10.178.2 9283 {
        weight 1
        MISC_CHECK {
            misc_path "/etc/keepalived/scripts/head_long.py http://10.10.178.2:9283/metrics"
            misc_timeout 5
        }
    }
    real_server 10.10.178.3 9283 {
        weight 1
        MISC_CHECK {
            misc_path "/etc/keepalived/scripts/head_long.py http://10.10.178.3:9283/metrics"
            misc_timeout 5
        }
    }
    real_server 10.10.178.4 9283 {
        weight 1
        MISC_CHECK {
            misc_path "/etc/keepalived/scripts/head_long.py http://10.10.178.4:9283/metrics"
            misc_timeout 5
        }
    }
}

# Galera cluster (MariaDB)
virtual_server 10.10.232.1 3306 {
    delay_loop 20
    lb_algo rr
    lb_kind NAT
    persistence_timeout 180 
    protocol TCP
    real_server 10.10.10.51 3306 {
        weight 1
        MISC_CHECK {
            misc_path "/etc/keepalived/scripts/keepalived_http_check.py http://10.10.10.51:9104/metrics"
            misc_timeout 5
        }
    }
    real_server 10.10.10.52 3306 {
        weight 1
        MISC_CHECK {
            misc_path "/etc/keepalived/scripts/keepalived_http_check.py http://10.10.10.52:9104/metrics"
            misc_timeout 5
        }
    }
    real_server 10.10.10.53 3306 {
        weight 1
        MISC_CHECK {
            misc_path "/etc/keepalived/scripts/keepalived_http_check.py http://10.10.10.53:9104/metrics"
            misc_timeout 5
        }
    }
}

The verbose version is OK, but as the configuration gets longer and if you have a lot of virtual servers (VS) and real servers (RS), I find much easier to work with the short version.

Few commands to help with configuration & debug

  • journalctl -u keepalived.service  -f
  • journalctl -u keepalived.service  -f | grep Keepalived_healthcheckers
  • keepalived -t && echo ConfOK
  • ipvsadm -l ( Check the below example)
# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port         Forward Weight ActiveConn InActConn
TCP  10.10.178.11:80 rr persistent 180
  -> 10.10.66.32:80             Masq    1      1          0         
TCP  10.10.178.11:443 rr persistent 180
  -> 10.10.66.32:443            Masq    1      2          0         
TCP  10.10.178.11:2222 rr persistent 180
  -> 10.10.66.31:2222           Masq    1      0          1         
  -> 10.10.66.32:2222           Masq    1      0          0         
  -> 10.10.66.33:2222           Masq    1      0          0         
TCP  10.10.232.1:3306 rr persistent 180
  -> 10.10.10.51:3306           Masq    1      0          0         
  -> 10.10.10.52:3306           Masq    1      0          11        
  -> 10.10.10.53:3306           Masq    1      0          67        
TCP  10.10.232.1:8082 rr persistent 180
  -> 10.10.66.31:8082           Masq    1      0          0         
  -> 10.10.66.32:8082           Masq    1      0          0         
  -> 10.10.66.33:8082           Masq    1      1          0         
TCP  10.10.232.1:9221 rr persistent 180
  -> 10.10.178.2:9221           Masq    1      0          8         
  -> 10.10.178.3:9221           Masq    1      0          0         
  -> 10.10.178.4:9221           Masq    1      0          0         
TCP  10.10.232.1:9283 rr persistent 180
  -> 10.10.178.2:9283           Masq    1      1          8         
TCP  10.10.232.1:9323 rr persistent 180
  -> 10.10.66.31:9323           Masq    1      1          0         
  -> 10.10.66.32:9323           Masq    1      0          0         
  -> 10.10.66.33:9323           Masq    1      0          0     

Like it ?

Get notified on new posts (max 1 / month)
Soyez informés lors des prochains articles

Leave a Reply

Your email address will not be published. Required fields are marked *