Clustercheck: Difference between revisions
(Created page with "= Custom clustercheck for Galera cluster = == Introduction == When using Galera as clustered MySQL database for...") |
(No difference)
|
Revision as of 12:32, 22 September 2017
Custom clustercheck for Galera cluster
Introduction
When using Galera as clustered MySQL database for OX App Suite, a loadbalancer is required to transform Galera's notion of equivalent cluster nodes into OX's understanding of master and slave nodes (or writeUrl and readUrl). Typical choice is a round-robin fashion for the readUrl and a persistent active-passive behavior for the writeUrl. See the Galera setup page for more detailed information.
The loadbalancers need to be able to check the health status of the Galera cluster nodes in order to decide which nodes are available for read requests, and which node should be picked persistently for write requestes. The latter point is most important if we are considering a lot of distributed loadbalancers not synchronizing their target list with each other, as one of our proposed high level design options works (distributed HAproxy instances on the OX App Suite groupware nodes). It seems natural to leverage the mechanism also used by MariaDB Maxscale to define a master node: use the one with wsrep_local_index=0.
Recent versions of the packages (both MariaDB and Percona) ship with a /usr/bin/clustercheck script which has basically been designed for that task. However, the original version has some shortcomings, most noticeably it has been designed for and works only with HAproxy, but not with Keepalived; and it offers no support for the wsrep_local_index=0 feature discussed above. So, we decided to improve on that script.
Installation
Copy-paste the script pasted below in a location on your Galera nodes where it will not be overwritten. We assume /usr/local/bin/clustercheck.ox for that purpose.
The clustercheck.ox script
#!/bin/bash
#
# Script to make a proxy (ie HAProxy) capable of monitoring Percona XtraDB Cluster nodes properly
#
# Authors:
# Raghavendra Prabhu <raghavendra.prabhu@percona.com>
# Olaf van Zandwijk <olaf.vanzandwijk@nedap.com>
#
# Based on the original script from Unai Rodriguez and Olaf (https://github.com/olafz/percona-clustercheck)
#
# Heavily rewritten and extended by Dominik Epple <dominik.epple@open-xchange.com> 2017-09
#
# Grant privileges required:
# GRANT PROCESS ON *.* TO 'clustercheckuser'@'localhost' IDENTIFIED BY 'clustercheckpassword';
AVAILABLE_WHEN_DONOR=0
ERR_FILE=/dev/null
AVAILABLE_WHEN_READONLY=1
DEFAULTS_EXTRA_FILE=""
DEFAULTS_FILE=""
#Timeout exists for instances where mysqld may be hung
TIMEOUT=10
MASTER_MODE=0
usage() {
cat <<EOF
usage:
$0 [-h]
show this usage text
$0 [-e error_file] [-f defaults_file] [-F defaults_extra_file] [-t timeout_secs] [-d] [-r] [-m] [user [pass]]
Perform clustercheck. Arguments are
-e error_file
File to log errors to. Default: /dev/null
-f defaults_file
Defaults file for MySQL client. Default: none
Preferred way to pass credentials to the MySQL client.
-F defaults_extra_file
Extra defaults file for MySQL client. Default: none
Kept for compatibilty to original clustercheck.
-t timeout
Timeout for the MySQL client in seconds. Default: 10
-d
Consider this node as available while being donor for a SST. Default: Donor node is considered unvailable.
-r
Consider this node as unavailable while being read-only. Default: read-only node is considered available.
-m
Consider this as available only if it has got wsrep_local_index=0. Useful to define a "master" node. Default: It is sufficient to be "Synced" for a node to be considered available.
user, pass
Credentials to connect to MySQL server to
EOF
}
log_debug() {
if [[ "$ERR_FILE" != "/dev/null" ]]; then
# the following woulde give nanoseconds timestamps, but create extra processes, which I want to avoid in normal ops
#echo "$(date --iso-8601=ns) $message" >> ${ERR_FILE}
printf "%(%FT%T%z)T" -1 >> ${ERR_FILE}
echo " $1" >> ${ERR_FILE}
fi
}
output() {
http_status=$1
message="$2"
exit_status=$3
log_debug "sending \"$http_status\" \"$message\" to the client."
length=${#message}
length=$(( length + 2 ))
echo -en "HTTP/1.0 $http_status\r\n"
echo -en "Content-Length: $length\r\n"
echo -en "\r\n"
echo -en "$message\r\n"
1<&-
exit $exit_status
}
while getopts "e:drf:t:mh" o; do
case "${o}" in
e)
ERR_FILE=${OPTARG}
;;
d)
AVAILABLE_WHEN_DONOR=1
;;
r)
AVAILABLE_WHEN_READONLY=0
;;
f)
DEFAULTS_FILE=${OPTARG}
;;
F)
DEFAULTS_EXTRA_FILE=${OPTARG}
;;
t)
TIMEOUT=${OPTARG}
;;
m)
MASTER_MODE=1
;;
h)
usage
exit 0
;;
*)
usage
exit 1
;;
esac
done
shift $((OPTIND-1))
MYSQL_USERNAME="${1}"
MYSQL_PASSWORD="${2}"
EXTRA_ARGS="--connect-timeout=$TIMEOUT -B -N"
if [[ -n "$MYSQL_USERNAME" ]]; then
EXTRA_ARGS="$EXTRA_ARGS --user=${MYSQL_USERNAME}"
fi
if [[ -n "$MYSQL_PASSWORD" ]]; then
EXTRA_ARGS="$EXTRA_ARGS --password=${MYSQL_PASSWORD}"
fi
if [[ -n "$DEFAULTS_FILE" ]]; then
if [[ -r "$DEFAULTS_FILE" ]]; then
# seems like it must be the first agrument
EXTRA_ARGS="--defaults-file=$DEFAULTS_FILE $EXTRA_ARGS "
else
echo "$0: error: defaults file $DEFAULTS_FILE not readable." >&2
exit 1
fi
fi
if [[ -n "$DEFAULTS_EXTRA_FILE" ]]; then
if [[ -r "$DEFAULTS_EXTRA_FILE" ]]; then
# seems like it must be the first agrument
EXTRA_ARGS="--defaults-extra-file=$DEFAULTS_EXTRA_FILE $EXTRA_ARGS "
else
echo "$0: error: defaults extra file $DEFAULTS_EXTRA_FILE not readable." >&2
exit 1
fi
fi
MYSQL_CMDLINE="mysql ${EXTRA_ARGS}"
# irrelevant for haproxy, required for keepalived: try to read input
log_debug "Reading HTTP request ..."
while read line
do
# https://stackoverflow.com/questions/369758/how-to-trim-whitespace-from-a-bash-variable
# remove trailing control characters
# inner expression: truncate left everything until to the right only spaces are left -> is only right spaces
# outer expression: truncate to the right the "right spaces"
line="${line%"${line##*[![:cntrl:]]}"}"
log_debug "Client sent: \"===$line===\""
if [[ -z "$line" ]]; then
log_debug "Client sent empty line, breaking"
break
fi
set -- $line
# haproxy sends by default OPTIONS, keepalived sends GET
if [[ ${1,,} = "get" || ${1,,} = "options" ]]; then
if [[ ${2:0:7} = "/master" ]]; then
log_debug "Upgrading to master mode as requrested by /master URL."
MASTER_MODE=1
fi
fi
done
log_debug "Done reading HTTP request."
set --
log_debug "Calling MySQL..."
mysql_output=$($MYSQL_CMDLINE -e 'SHOW GLOBAL STATUS WHERE Variable_name REGEXP "^(wsrep_local_state|wsrep_cluster_status|wsrep_local_index)$"; show global variables like "read_only";' 2>>${ERR_FILE} )
log_debug "MySQL output: ===$mysql_output==="
set -- $mysql_output
while [[ $# -gt 1 ]]
do
case "$1" in
wsrep_local_state|wsrep_cluster_status|wsrep_local_index|read_only)
declare $1="$2"
shift
shift
;;
*)
log_debug "unexpected output from MySQL: $1 $2"
shift
shift
;;
esac
done
log_debug "After parsing: wsrep_local_state=$wsrep_local_state wsrep_cluster_status=$wsrep_cluster_status wsrep_local_index=$wsrep_local_index read_only=$read_only"
if [[ "$wsrep_cluster_status" == 'Primary' && ( $wsrep_local_state -eq 4 || ( $wsrep_local_state -eq 2 && $AVAILABLE_WHEN_DONOR -eq 1 ) ) ]]
then
if [[ "${MASTER_MODE}" == 1 ]];then
if [[ ${wsrep_local_index} -eq 0 ]];then
output "200 OK" "Percona XtraDB Cluster Node is synced and wsrep_local_index==0." 0
else
output "503 Service Unavailable" "Percona XtraDB Cluster Node is not wsrep_local_index==0 and you requested master mode." 1
fi
fi
if [[ "${read_only}" == "ON" && $AVAILABLE_WHEN_READONLY -eq 0 ]];then
output "503 Service Unavailable" "Percona XtraDB Cluster Node is read_only and you requested AVAILABLE_WHEN_READONLY=0." 1
fi
output "200 OK" "Percona XtraDB Cluster Node is synced." 0
else
output "503 Service Unavailable" "Percona XtraDB Cluster Node is not synced or non-PRIM." 1
fi