Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bitnami/redis]: Enhance sentinel resiliency, harmonize sentinel behaviour by using staticID as default behaviour #7278

Merged
merged 1 commit into from
Aug 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bitnami/redis/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ name: redis
sources:
- https://github.com/bitnami/bitnami-docker-redis
- http://redis.io/
version: 14.8.11
version: 15.0.0
2 changes: 0 additions & 2 deletions bitnami/redis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,7 @@ The command removes all the Kubernetes components associated with the chart and
| `sentinel.quorum` | Sentinel Quorum | `2` |
| `sentinel.downAfterMilliseconds` | Timeout for detecting a Redis™ node is down | `60000` |
| `sentinel.failoverTimeout` | Timeout for performing a election failover | `18000` |
| `sentinel.cleanDelaySeconds` | Delay seconds when cleaning nodes IPs | `5` |
| `sentinel.parallelSyncs` | Number of replicas that can be reconfigured in parallel to use the new master after a failover | `1` |
| `sentinel.staticID` | Enable static Sentinel IDs for each replica | `false` |
| `sentinel.configuration` | Configuration for Redis™ Sentinel nodes | `""` |
| `sentinel.command` | Override default container command (useful when using custom images) | `[]` |
| `sentinel.args` | Override default container args (useful when using custom images) | `[]` |
Expand Down
158 changes: 67 additions & 91 deletions bitnami/redis/templates/scripts-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ data:
myip=$(echo $myip | awk '{if ( match($0,/([0-9]+\.)([0-9]+\.)([0-9]+\.)[0-9]+/) ) { print substr($0,RSTART,RLENGTH); } }')
fi

HEADLESS_SERVICE="{{ template "common.names.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
REDIS_SERVICE="{{ template "common.names.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"

not_exists_dns_entry() {
if [[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep "^${myip}" )" ]]; then
warn "$HEADLESS_SERVICE does not contain the IP of this pod: ${myip}"
Expand All @@ -35,14 +38,14 @@ data:
return 0
}

HEADLESS_SERVICE="{{ template "common.names.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
REDIS_SERVICE="{{ template "common.names.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"

# Waits for DNS to add this ip to the service DNS entry
retry_while not_exists_dns_entry

export REDIS_REPLICATION_MODE="slave"
[[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep -v "^${myip}")" ]] && export REDIS_REPLICATION_MODE="master"
get_sentinel_master_info() {
if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
echo $($sentinel_info_command)
}

{{- if and .Values.replica.containerSecurityContext.runAsUser (eq (.Values.replica.containerSecurityContext.runAsUser | int) 0) }}
useradd redis
Expand All @@ -52,6 +55,21 @@ data:
[[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")"
[[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")"

# Waits for DNS to add this ip to the service DNS entry
retry_while not_exists_dns_entry

if [[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep -v "^${myip}")" ]]; then
# Only node available on the network, master by default
export REDIS_REPLICATION_MODE="master"
else
export REDIS_REPLICATION_MODE="slave"

# Fetches current master's host and port
REDIS_SENTINEL_INFO=($(get_sentinel_master_info))
REDIS_MASTER_HOST=${REDIS_SENTINEL_INFO[0]}
REDIS_MASTER_PORT_NUMBER=${REDIS_SENTINEL_INFO[1]}
fi

if [[ "$REDIS_REPLICATION_MODE" = "master" ]]; then
debug "Starting as master node"
if [[ ! -f /opt/bitnami/redis/etc/master.conf ]]; then
Expand All @@ -62,30 +80,6 @@ data:
if [[ ! -f /opt/bitnami/redis/etc/replica.conf ]];then
cp /opt/bitnami/redis/mounted-etc/replica.conf /opt/bitnami/redis/etc/replica.conf
fi
if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
REDIS_SENTINEL_INFO=($($sentinel_info_command))
REDIS_MASTER_HOST=${REDIS_SENTINEL_INFO[0]}
REDIS_MASTER_PORT_NUMBER=${REDIS_SENTINEL_INFO[1]}

# Immediately attempt to connect to the reported master. If it doesn't exist the connection attempt will either hang
# or fail with "port unreachable" and give no data. The liveness check will then timeout waiting for the redis
# container to be ready and restart the it. By then the new master will likely have been elected
if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi

if [[ ! ($($sentinel_info_command)) ]]; then
# master doesn't actually exist, this probably means the remaining pods haven't elected a new one yet
# and are reporting the old one still. Once this happens the container will get stuck and never see the new
# master. We stop here to allow the container to not pass the liveness check and be restarted.
exit 1
fi
fi

if [[ ! -f /opt/bitnami/redis/etc/redis.conf ]];then
Expand Down Expand Up @@ -152,6 +146,9 @@ data:
myip=$(echo $myip | awk '{if ( match($0,/([0-9]+\.)([0-9]+\.)([0-9]+\.)[0-9]+/) ) { print substr($0,RSTART,RLENGTH); } }')
fi

HEADLESS_SERVICE="{{ template "common.names.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
REDIS_SERVICE="{{ template "common.names.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"

sentinel_conf_set() {
local -r key="${1:?missing key}"
local value="${2:-}"
Expand All @@ -167,6 +164,10 @@ data:
sentinel_conf_add() {
echo $'\n'"$@" >> "/opt/bitnami/redis-sentinel/etc/sentinel.conf"
}
sentinel_conf_remove() {
sed -e '/^$1-/d' -e '/^$/d' /opt/bitnami/redis-sentinel/etc/sentinel.conf > /opt/bitnami/redis-sentinel/etc/sentinel.conf.tmp
mv /opt/bitnami/redis-sentinel/etc/sentinel.conf.tmp /opt/bitnami/redis-sentinel/etc/sentinel.conf
}
host_id() {
echo "$1" | openssl sha1 | awk '{print $2}'
}
Expand All @@ -178,9 +179,14 @@ data:
debug "$HEADLESS_SERVICE has my IP: ${myip}"
return 0
}

HEADLESS_SERVICE="{{ template "common.names.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
REDIS_SERVICE="{{ template "common.names.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
get_sentinel_master_info() {
if is_boolean_yes "$REDIS_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_TLS_CERT_FILE} --key ${REDIS_TLS_KEY_FILE} --cacert ${REDIS_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
echo $($sentinel_info_command)
}

# Waits for DNS to add this ip to the service DNS entry
retry_while not_exists_dns_entry
Expand All @@ -195,83 +201,53 @@ data:
printf "\nrequirepass %s" "$REDIS_PASSWORD" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- end }}
{{- end }}
{{- if .Values.sentinel.staticID }}
printf "\nsentinel myid %s" "$(host_id "$HOSTNAME")" >> /opt/bitnami/redis-sentinel/etc/sentinel.conf
{{- end }}
fi

export REDIS_REPLICATION_MODE="slave"

if [[ -z "$(getent ahosts "$HEADLESS_SERVICE" | grep -v "^${myip}")" ]]; then
# Only node available on the network, master by default
export REDIS_REPLICATION_MODE="master"
fi

# check master node firstly and quit as soon as possible when master is not ready.
if [[ "$REDIS_REPLICATION_MODE" = "master" ]]; then
REDIS_MASTER_HOST=${myip}
REDIS_MASTER_PORT_NUMBER="{{ .Values.master.containerPort }}"
else
if is_boolean_yes "$REDIS_SENTINEL_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_SENTINEL_TLS_CERT_FILE} --key ${REDIS_SENTINEL_TLS_KEY_FILE} --cacert ${REDIS_SENTINEL_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_SERVICE -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi
REDIS_SENTINEL_INFO=($($sentinel_info_command))
export REDIS_REPLICATION_MODE="slave"

# Fetches current master's host and port
REDIS_SENTINEL_INFO=($(get_sentinel_master_info))
REDIS_MASTER_HOST=${REDIS_SENTINEL_INFO[0]}
REDIS_MASTER_PORT_NUMBER=${REDIS_SENTINEL_INFO[1]}

# Immediately attempt to connect to the reported master. If it doesn't exist the connection attempt will either hang
# or fail with "port unreachable" and give no data. The liveness check will then timeout waiting for the sentinel
# container to be ready and restart the it. By then the new master will likely have been elected
if is_boolean_yes "$REDIS_SENTINEL_TLS_ENABLED"; then
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.service.sentinelPort }} --tls --cert ${REDIS_SENTINEL_TLS_CERT_FILE} --key ${REDIS_SENTINEL_TLS_KEY_FILE} --cacert ${REDIS_SENTINEL_TLS_CA_FILE} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
else
sentinel_info_command="redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $REDIS_MASTER_HOST -p {{ .Values.sentinel.service.sentinelPort }} sentinel get-master-addr-by-name {{ .Values.sentinel.masterSet }}"
fi

if [[ ! ($($sentinel_info_command)) ]]; then
# master doesn't actually exist, this probably means the remaining pods haven't elected a new one yet
# and are reporting the old one still. Once this happens the container will get stuck and never see the new
# master. We stop here to allow the container to not pass the liveness check and be restarted.
exit 1
fi
fi

# Clean sentineles from the current sentinel nodes after failover completed.
for node in $( getent ahosts "$HEADLESS_SERVICE" | grep -v "^${myip}" | cut -f 1 -d ' ' | uniq ); do
info "Cleaning sentinels in sentinel node: $node"
if is_boolean_yes "$REDIS_SENTINEL_TLS_ENABLED"; then
redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $node -p {{ .Values.sentinel.containerPort }} --tls --cert ${REDIS_SENTINEL_TLS_CERT_FILE} --key ${REDIS_SENTINEL_TLS_KEY_FILE} --cacert ${REDIS_SENTINEL_TLS_CA_FILE} sentinel reset "*"
else
redis-cli {{- if .Values.auth.enabled }} -a $REDIS_PASSWORD {{- end }} -h $node -p {{ .Values.sentinel.containerPort }} sentinel reset "*"
fi
sleep {{ .Values.sentinel.cleanDelaySeconds }}
done
info "Sentinels clean up done"

sentinel_conf_set "sentinel monitor" "{{ .Values.sentinel.masterSet }} "$REDIS_MASTER_HOST" "$REDIS_MASTER_PORT_NUMBER" {{ .Values.sentinel.quorum }}"

add_replica() {
if [[ "$1" != "$REDIS_MASTER_HOST" ]]; then
sentinel_conf_add "sentinel known-replica {{ .Values.sentinel.masterSet }} $1 {{ .Values.sentinel.containerPort }}"
add_known_sentinel() {
hostname="$1"
ip="$2"

if [[ -n "$hostname" && -n "$ip" && "$hostname" != "$HOSTNAME" ]]; then
sentinel_conf_add "sentinel known-sentinel {{ .Values.sentinel.masterSet }} $ip {{ .Values.sentinel.containerPort }} $(host_id "$hostname")"
fi
}
add_known_replica() {
ip="$1"

if [[ -n "$ip" && "$ip" != "$REDIS_MASTER_HOST" ]]; then
sentinel_conf_add "sentinel known-replica {{ .Values.sentinel.masterSet }} $ip {{ .Values.replica.containerPort }}"
fi
}

{{- if .Values.sentinel.staticID }}
# remove generated known sentinels and replicas
tmp="$(sed -e '/^sentinel known-/d' -e '/^$/d' /opt/bitnami/redis-sentinel/etc/sentinel.conf)"
echo "$tmp" > /opt/bitnami/redis-sentinel/etc/sentinel.conf
# removes generated known sentinels and replicas
sentinel_conf_remove "sentinel known"

# Add available hosts on the network as known replicas & sentinels
for node in $(seq 0 {{ .Values.replica.replicaCount }}); do
NAME="{{ template "common.names.fullname" . }}-node-$node"
IP="$(getent hosts "$NAME.$HEADLESS_SERVICE" | awk ' {print $1 }')"
if [[ "$NAME" != "$HOSTNAME" && -n "$IP" ]]; then
sentinel_conf_add "sentinel known-sentinel {{ .Values.sentinel.masterSet }} $IP {{ .Values.sentinel.containerPort }} $(host_id "$NAME")"
add_replica "$IP"
fi
hostname="{{ template "common.names.fullname" . }}-node-$node"
ip="$(getent hosts "$hostname.$HEADLESS_SERVICE" | awk '{ print $1 }')"

add_known_sentinel "$hostname" "$ip"
add_known_replica "$ip"
done
add_replica "${myip}"
{{- end }}

{{- if .Values.tls.enabled }}
ARGS=("--port" "0")
Expand Down
7 changes: 0 additions & 7 deletions bitnami/redis/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -805,19 +805,12 @@ sentinel:
## Sentinel timing restrictions
## @param sentinel.downAfterMilliseconds Timeout for detecting a Redis&trade; node is down
## @param sentinel.failoverTimeout Timeout for performing a election failover
## @param sentinel.cleanDelaySeconds Delay seconds when cleaning nodes IPs
##
downAfterMilliseconds: 60000
failoverTimeout: 18000
cleanDelaySeconds: 5
## @param sentinel.parallelSyncs Number of replicas that can be reconfigured in parallel to use the new master after a failover
##
parallelSyncs: 1
## @param sentinel.staticID Enable static Sentinel IDs for each replica
## If disabled each sentinel will generate a random id at startup
## If enabled, each replicas will have a constant ID on each start-up
##
staticID: false
## @param sentinel.configuration Configuration for Redis&trade; Sentinel nodes
## ref: https://redis.io/topics/sentinel
##
Expand Down