Lease update fails with HOOKS_CALLOUT_ERROR in HA with multithreading
name: Lease update fails with HOOKS_CALLOUT_ERROR in HA with multithreading
about: Lease update periodically fails with HOOKS_CALLOUT_ERROR in HA scenario with multithreading
Describe the bug When running >100k customers (over 400k lease updates per day) in HA scenario with multithreading enabled, approximately 10-20 times per day lease update fails with the following behavior:
- Server A sends lease update to server B (situation is symmetrical for both primary and secondary server)
- Server B logs an error during message processing:
ERROR [kea-dhcp4.callouts.139779861616384] HOOKS_CALLOUT_ERROR error returned by callout on hook $lease4_update registered by library with index 1 (callout address 0x7f2110e0dd70) (callout duration 0.114 ms)
- Server A logs that the lease update failed:
WARN [kea-dhcp4.ha-hooks.139799612172032] HA_LEASE_UPDATE_FAILED [hwtype=1 2c:95:7f:11:94:49], cid=[no info], tid=0x55f54aa8: lease update to zgvdhcpsrv01 (http://172.24.85.53:8081/) failed: ResourceBusy: IP address:10.180.96.67 could not be updated., error code 1
To Reproduce Steps to reproduce the behavior:
- Run KEA (DHCP4 only) in HA scenario with two load-balancing servers (primary and secondary)
- Start serving over 100k clients
- Monitor log for HOOKS_CALLOUT_ERROR and HA_LEASE_UPDATE_FAILED messages
Expected behavior Servers should not have any failed lease updates
Environment:
- Kea version: 2.0.2
- OS: Ubuntu 20.04
- Memfile
- libdhcp_lease_cmds, libdhcp_stat_cmds, libdhcp_ha
Additional Information The config is below - server info, subnets and client classes have been anonymized or removed.
{
"Dhcp4": {
"authoritative": false,
"boot-file-name": "",
"calculate-tee-times": false,
"client-classes": [],
"control-socket": {
"socket-name": "/tmp/kea-dhcp4-ctrl.sock",
"socket-type": "unix"
},
"ddns-generated-prefix": "myhost",
"ddns-override-client-update": false,
"ddns-override-no-update": false,
"ddns-qualifying-suffix": "",
"ddns-replace-client-name": "never",
"ddns-send-updates": true,
"ddns-update-on-renew": false,
"ddns-use-conflict-resolution": true,
"decline-probation-period": 86400,
"dhcp-ddns": {
"enable-updates": false,
"max-queue-size": 1024,
"ncr-format": "JSON",
"ncr-protocol": "UDP",
"sender-ip": "0.0.0.0",
"sender-port": 0,
"server-ip": "127.0.0.1",
"server-port": 53001
},
"dhcp-queue-control": {
"capacity": 64,
"enable-queue": false,
"queue-type": "kea-ring4"
},
"dhcp4o6-port": 0,
"echo-client-id": true,
"expired-leases-processing": {
"flush-reclaimed-timer-wait-time": 15,
"hold-reclaimed-time": 150,
"max-reclaim-leases": 1000,
"max-reclaim-time": 500,
"reclaim-timer-wait-time": 10,
"unwarned-reclaim-cycles": 5
},
"hooks-libraries": [
{
"library": "/usr/lib/x86_64-linux-gnu/kea/hooks/libdhcp_lease_cmds.so",
"parameters": {}
},
{
"library": "/usr/lib/x86_64-linux-gnu/kea/hooks/libdhcp_stat_cmds.so"
},
{
"library": "/usr/lib/x86_64-linux-gnu/kea/hooks/libdhcp_ha.so",
"parameters": {
"high-availability": [
{
"delayed-updates-limit": 100,
"heartbeat-delay": 3000,
"max-ack-delay": 7000,
"max-response-delay": 10000,
"max-unacked-clients": 0,
"mode": "load-balancing",
"multi-threading": {
"enable-multi-threading": true,
"http-client-threads": 0,
"http-dedicated-listener": true,
"http-listener-threads": 0
},
"peers": [
{
"auto-failover": true,
"name": "X",
"role": "primary",
"url": "http://X.X.X.X:8081/"
},
{
"auto-failover": true,
"name": "Y",
"role": "secondary",
"url": "http://Y.Y.Y.Y:8081/"
}
],
"send-lease-updates": true,
"sync-leases": true,
"sync-page-limit": 10000,
"sync-timeout": 60000,
"this-server-name": "Y",
"wait-backup-ack": false
}
]
}
}
],
"host-reservation-identifiers": [
"hw-address",
"duid",
"circuit-id",
"client-id"
],
"hostname-char-replacement": "",
"hostname-char-set": "[^A-Za-z0-9.-]",
"interfaces-config": {
"dhcp-socket-type": "udp",
"interfaces": [
"eth1"
],
"re-detect": true
},
"ip-reservations-unique": true,
"lease-database": {
"lfc-interval": 3600,
"name": "/var/lib/kea/dhcp4.leases",
"persist": true,
"port": 0,
"type": "memfile"
},
"loggers": [
{
"debuglevel": 0,
"name": "kea-dhcp4",
"output_options": [
{
"output": "syslog:kea"
}
],
"severity": "warn"
}
],
"match-client-id": true,
"multi-threading": {
"enable-multi-threading": true,
"packet-queue-size": 28,
"thread-pool-size": 4
},
"next-server": "0.0.0.0",
"option-data": [],
"option-def": [
{
"array": true,
"code": 121,
"encapsulate": "",
"name": "classless-static-route",
"record-types": "uint8, uint8",
"space": "dhcp4",
"type": "record"
}
],
"parked-packet-limit": 256,
"rebind-timer": 37800,
"renew-timer": 21600,
"reservations-global": false,
"reservations-in-subnet": true,
"reservations-out-of-pool": false,
"sanity-checks": {
"lease-checks": "warn"
},
"server-hostname": "",
"server-tag": "",
"shared-networks": [],
"statistic-default-sample-age": 0,
"statistic-default-sample-count": 20,
"store-extended-info": false,
"subnet4": [],
"t1-percent": 0.5,
"t2-percent": 0.875,
"valid-lifetime": 43200
}
}
Contacting you Email/Github, telephone is available after contact