#!/bin/bash
# Authors:  Bartosz Kupidura (Mirantis):  Rewrite RA to support mysql/galera
#           Sergii Golovatiuk (Mirantis): Rewrite RA to support mysql/galera
#           Alan Robertson:               DB2 Script
#           Jakub Janczak:                rewrite as MySQL
#           Andrew Beekhof:               cleanup and import
#           Sebastian Reitenbach:         add OpenBSD defaults, more cleanup
#           Narayan Newton:               add Gentoo/Debian defaults
#           Marian Marinov, Florian Haas: add replication capability
#           Yves Trudeau, Baron Schwartz: add VIP support and improve replication
#
# Support:  openstack@lists.launchpad.net
# License:  GNU General Public License (GPL)
#
# (c) 2002-2005 International Business Machines, Inc.
#     2005-2010 Linux-HA contributors
#     2014      Mirantis Inc.
#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_FUEL_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/fuel}
. ${OCF_FUEL_FUNCTIONS_DIR}/ocf-fuel-funcs

#######################################################################
# Fill in some defaults if no values are specified
OCF_RESKEY_binary_default="/usr/bin/mysqld_safe"
OCF_RESKEY_client_binary_default="/usr/bin/mysql"
OCF_RESKEY_config_default="/etc/mysql/my.cnf"
OCF_RESKEY_datadir_default="/var/lib/mysql"
OCF_RESKEY_user_default="mysql"
OCF_RESKEY_group_default="mysql"
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock"
OCF_RESKEY_test_user_default="root"
OCF_RESKEY_test_passwd_default=""
OCF_RESKEY_additional_parameters_default=""
OCF_RESKEY_master_timeout_default="300"

: ${HA_LOGTAG="ocf-mysql-wss"}
: ${HA_LOGFACILITY="daemon"}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
MYSQL_BINDIR="$(dirname ${OCF_RESKEY_binary})"

: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}

: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}}

: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}

: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}}

: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}}
: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}}

: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}}
: ${OCF_RESKEY_master_timeout=${OCF_RESKEY_master_timeout_default}}

#######################################################################
# Convenience variables
MYSQL=$OCF_RESKEY_client_binary
HOSTNAME=$(uname -n)
MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
if [ "${OCF_RESKEY_test_conf}" ]; then
    MYSQL_OPTIONS_TEST="--defaults-extra-file=${OCF_RESKEY_test_conf} ${MYSQL_OPTIONS_LOCAL}"
else
    MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
fi
#######################################################################
usage() {
    cat <<UEND
usage: $0 (start|stop|meta-data|validate-all|monitor)

$0 manages a MySQL Database as an HA resource.

The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'monitor' operation reports whether the database seems to be working
The 'validate-all' operation reports whether the parameters are valid

UEND
}
meta_data() {
    cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mysql" version="0.1">
  <version>0.1</version>
  <longdesc lang="en">
  Resource script for MySQL
  </longdesc>
  <shortdesc lang="en">Resource script for MySQL</shortdesc>
  <parameters>
    <parameter name="binary" unique="0" required="0">
      <longdesc lang="en">
      Location of the MySQL server binary
      </longdesc>
      <shortdesc lang="en">MySQL server binary</shortdesc>
      <content type="string" default="${OCF_RESKEY_binary_default}" />
    </parameter>
    <parameter name="client_binary" unique="0" required="0">
      <longdesc lang="en">
      Location of the MySQL client binary
      </longdesc>
      <shortdesc lang="en">MySQL client binary</shortdesc>
      <content type="string" default="${OCF_RESKEY_client_binary_default}" />
    </parameter>
    <parameter name="config" unique="0" required="0">
      <longdesc lang="en">
      Configuration file
      </longdesc>
      <shortdesc lang="en">MySQL config</shortdesc>
      <content type="string" default="${OCF_RESKEY_config_default}" />
    </parameter>
    <parameter name="datadir" unique="0" required="0">
      <longdesc lang="en">
      Directory containing databases
      </longdesc>
      <shortdesc lang="en">Data directory</shortdesc>
      <content type="string" default="${OCF_RESKEY_datadir_default}" />
    </parameter>
    <parameter name="user" unique="0" required="0">
      <longdesc lang="en">
      User running MySQL daemon
      </longdesc>
      <shortdesc lang="en">MySQL user</shortdesc>
      <content type="string" default="${OCF_RESKEY_user_default}" />
    </parameter>
    <parameter name="group" unique="0" required="0">
      <longdesc lang="en">
      Group running MySQL daemon (for logfile and directory permissions)
      </longdesc>
      <shortdesc lang="en">MySQL group</shortdesc>
      <content type="string" default="${OCF_RESKEY_group_default}"/>
    </parameter>
    <parameter name="pid" unique="0" required="0">
      <longdesc lang="en">
      The pidfile to be used for mysqld.
      </longdesc>
      <shortdesc lang="en">MySQL pid file</shortdesc>
      <content type="string" default="${OCF_RESKEY_pid_default}"/>
    </parameter>
    <parameter name="socket" unique="0" required="0">
      <longdesc lang="en">
      The socket to be used for mysqld.
      </longdesc>
      <shortdesc lang="en">MySQL socket</shortdesc>
      <content type="string" default="${OCF_RESKEY_socket_default}"/>
    </parameter>
    <parameter name="test_user" unique="0" required="0">
      <longdesc lang="en">
        MySQL test user, must have select privilege on 'show status'
      </longdesc>
      <shortdesc lang="en">MySQL test user</shortdesc>
      <content type="string" default="${OCF_RESKEY_test_user_default}" />
    </parameter>
    <parameter name="test_passwd" unique="0" required="0">
      <longdesc lang="en">
      MySQL test user password
      </longdesc>
      <shortdesc lang="en">MySQL test user password</shortdesc>
      <content type="string" default="${OCF_RESKEY_test_passwd_default}" />
    </parameter>
    <parameter name="additional_parameters" unique="0" required="0">
      <longdesc lang="en">
      Additional parameters which are passed to the mysqld on startup.
      (e.g. --skip-external-locking or --skip-grant-tables)
      </longdesc>
      <shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
      <content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
    </parameter>
    <parameter name="master_timeout" unique="0" required="0">
      <longdesc lang="en">
      How long we should wait for galera master. If master not come up before timeout,
      RA will choose new master from already running nodes. This value can by changed by crm_attribute:
      # crm_attribute --name galera_master_timeout --update 500
      Remember to remove this after maintenance. USE WITH CAUTION!
      Remember to change timeout for start operation. Start timeout should be bigger than master_timeout
      </longdesc>
      <shortdesc lang="en">Galera master timeout</shortdesc>
      <content type="integer" default="${OCF_RESKEY_master_timeout_default}"/>
    </parameter>
  </parameters>
  <actions>
    <action name="start"        timeout="330" />
    <action name="stop"         timeout="120" />
    <action name="monitor"      timeout="30" interval="20" depth="0" />
    <action name="meta-data"    timeout="5" />
    <action name="validate-all" timeout="10" />
  </actions>
</resource-agent>
END
}
# Convenience functions
#######################################################################
nodes_in_cluster_online() {
    local LH="${LL} nodes_in_cluster_online():"
    local NODES

    NODES=$(crm_node --partition | sed -e '/(null)/d')
    if [ ! -z "$NODES" ]; then
        ocf_log info "${LH} Online Nodes in cluster: ${NODES}"
        echo $NODES
    else
        ocf_log info "${LH} No online nodes in cluster"
        echo
    fi
}
nodes_in_cluster() {
    local LH="${LL} nodes_in_cluster_online():"
    local NODES

    #Ubuntu doesn't like \w
    NODES=$(crm_node --list | awk '/^[a-zA-Z0-9]/ {print $2}' | sed -e '/(null)/d')
    if [ ! -z "$NODES" ]; then
        ocf_log info "${LH} Nodes in cluster: ${NODES}"
        echo $NODES
    else
        ocf_log info "${LH} No nodes in cluster"
        echo
    fi
}

#Validate if GTID have correct format (return 0), else return 1
#valid values are:
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:123 - standard cluster-id:commit-id
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:-1 - standard non initialized cluster, 00000000-0000-0000-0000-000000000000:-1
validate_gtid() {
    local LH="${LL} validate_gtid():"
    local rc
    local status_loglevel="err"

    if [ -z $1 ]; then
        ocf_log $status_loglevel "${LH} No GTID provided"
        return 1
    fi

    echo $1 | grep -q -E "${UUID_REGEX}"
    rc=$?

    if [ $rc -ne 0 ]; then
        ocf_log $status_loglevel "${LH} GTID have wrong format: $1"
        return 1
    else
        ocf_log info "${LH} GTID OK: $1"
        return 0
    fi
}

#Get galera GTID from local mysql instance.
#If changed, update it in CIB, then return 0 and new GTID
#If unchanged or bad vlue, return 1 and the current GTID from CIB
update_node_gtid() {
    local LH="${LL} update_node_gtid():"
    local status_loglevel="err"
    local GTID
    local GTID_current
    local CLUSTER_ID
    local COMMIT_ID

    # Set loglevel to info during probe
    if ocf_is_probe; then
        status_loglevel="info"
    fi

    if mysql_status $status_loglevel 1; then
        CLUSTER_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
            -e "SHOW STATUS LIKE 'wsrep_local_state_uuid'" | awk '{print $NF}')
        COMMIT_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
            -e "SHOW STATUS LIKE 'wsrep_last_committed'" | awk '{print $NF}')
        GTID="$CLUSTER_ID:$COMMIT_ID"
    else
        GTID=$(${OCF_RESKEY_binary} --wsrep-recover 2>&1 | \
            grep -e 'Recovered position' -e 'wsrep_start_position' | grep -Eo "${UUID_REGEX}")
        [ -z "${GTID}" ] && GTID=$(cat ${OCF_RESKEY_datadir}/grastate.dat \
            | awk '/uuid/ { uuid = $NF} /seqno/ { seqno = $NF} END {print uuid":"seqno}')
    fi

    GTID_current=$(get_node_gtid $HOSTNAME)
    if [ "${GTID}" != "${GTID_current}" ]; then
        if validate_gtid "${GTID}"; then
            ocf_log info "${LH} Galera GTID: ${GTID}"
            crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
                --update $GTID
            echo "${GTID}"
            return 0
        fi
    fi
    echo "${GTID_current}"
    return 1
}

start_election_attr()
{
    local time=$( date -u +%s )
    local LH="${LL} start_election_attr():"
    ocf_log info "${LH} setting election start attribute time to ${time}"
    crm_attribute --quiet --type crm_config --node $HOSTNAME --name start_election_${HOSTNAME} --update $time
}

clear_start_election_attr()
{
    local time=$( date -u +%s )
    local LH="${LL} clear_start_election_attr():"
    ocf_log info "${LH} dropping election start attribute"
    crm_attribute --quiet --type crm_config --node $HOSTNAME --name start_election_${HOSTNAME} --delete
}




update_node_pc()
{
    local LH="${LL} update_node_pc():"
    ocf_log info "${LH} Setting node PC flag to true"
    crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc --update 'true'
}

clear_node_pc()
{
    ocf_log info "${LH} Cleaning up is_pc attribute"
    crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc \
        --delete
}

get_master_timeout() {
    local LH="${LL} get_master_timeout():"
    local timeout
    timeout=$(crm_attribute --quiet --name galera_master_timeout \
        --query --default=$OCF_RESKEY_master_timeout -q | sed -e '/(null)/d')

    ocf_log info "${LH} Setting timeout $timeout"
    echo $timeout
}

#Get gtid attribute for $1 node, "0" means no GTID set or wrong format for GTID
get_node_gtid() {
    local LH="${LL} get_node_gtid():"
    local GTID

    GTID=$(crm_attribute --quiet --node $1 --lifetime reboot --query \
        --name gtid 2> /dev/null | sed -e '/(null)/d')

    if [ -z "$GTID" ]; then
        ocf_log info "${LH} No GTID for $1"
        echo 0
    else
        if validate_gtid "$GTID"; then
            ocf_log info "${LH} Galera GTID: ${GTID}"
            echo $GTID
        else
            ocf_log info "${LH} No GTID for $1"
            echo 0
        fi
    fi
}

#Retry get node gtid with once with a random 1-10 second sleep if gtid is not found
get_node_gtid_with_retry() {
    local LH="${LL} get_node_gtid():"
    local GTID
    local NODE="$1"

    GTID=$(get_node_gtid $NODE)

    if [ "$GTID" = "0" ]; then
        sleep $(( ( $RANDOM % 10 )  + 1 ))
        GTID=$(get_node_gtid $NODE)
    fi

    echo $GTID
}

check_if_reelection_needed() {
    local LH="${LL} check_if_reelection_needed()"
    local PARTITION_WITH_QUORUM
    local RESOURCE_NAME
    local NODE_COUNT
    local RUNNING_INSTANCES
    local rc

    PARTITION_WITH_QUORUM=$(crm_node -q | sed -e '/(null)/d')
    RESOURCE_NAME=$(echo $OCF_RESOURCE_INSTANCE | cut -f1 -d":")
    NODE_COUNT=$(nodes_in_cluster | wc -w)

    if [ $PARTITION_WITH_QUORUM -eq 1 -o $NODE_COUNT -eq 1 ]; then
        RUNNING_INSTANCES=$(crm_resource \
            --quiet --locate --resource $RESOURCE_NAME | sed -e '/(null)/d' | wc -l 2> /dev/null)
        rc=$?
        if [ $RUNNING_INSTANCES -lt 1 ]; then
            ocf_log info "${LH} Election is needed"
            return 0
        fi
    fi

    ocf_log info "${LH} Election was done"
    return 1
}

# Return 0 and the pid, if running a new cluster as a seed node
check_if_new_cluster() {
    local LH="${LL} check_if_new_cluster()"
    local pid
    # Match a mysqld pid by the datadir and a new cluster sign, exclude position recovery
    pid=$(ps -C mysqld -o pid= -o command= -o args= | \
        grep -e "${OCF_RESKEY_datadir}.*wsrep-new-cluster" -e "wsrep-new-cluster.*${OCF_RESKEY_datadir}" | \
        awk '!/wsrep.recover|defunct/ {print $1}')
    if [ "${pid}" ]; then
        update_node_pc
        ocf_log info "${LH} New cluster"
        return 0
    fi
    ocf_log info "${LH} Running cluster"
    return 1
}

get_master() {
    local LH="${LL} get_master()"
    local NODES=$*
    local POSSIBLE_MASTERS
    local -A TMP
    local MASTER_GTID
    local GTID
    local NODE
    local NODE_SCORE
    local LATEST_SEQNO=-1
    local SEQNO
    local MASTER

    # Ensure the same nodes list to reach a consensus for the choosen master across all of the nodes
    NODES=$(printf -- '%s\n' ${NODES} | sort -u)
    # Form a hash of keys as node names, values as GTID:SEQNO
    for NODE in $NODES; do
        # Try and get a gtid with a retry when the GTID=0 to make sure there
        # is plenty of time if multiple nodes are starting at the same time.
        GTID=$(get_node_gtid_with_retry $NODE)
        # Cut the seqno off the GTID:SEQNO pairs
        TMP[$NODE]=$GTID
    done

    # Find possible masters
    # Cut the seqnums off the stored GTID:SEQNO pairs, then find the most seen GTID for the nodes
    MASTER_GTID=$(printf -- '%s\n' ${TMP[@]%:*} | grep -vE -e "^0$" -e $ZEROID | sort | uniq -c | awk '{print $2}' | head -1)
    [ "${MASTER_GTID}" ] || MASTER_GTID=$ZEROID
    ocf_log info "${LH} The most seen GTID is: ${MASTER_GTID}"
    for NODE in $NODES; do
        NODE_SCORE=$(crm_simulate -Ls | awk "/${OCF_RESOURCE_INSTANCE}/ && /clone_color/ && ! /${OCF_RESOURCE_INSTANCE}:/ && /${NODE}/ {print \$NF}")
        if [[ $NODE_SCORE =~ ^-?[0-9]+$ && $NODE_SCORE -le 0 || $NODE_SCORE = "-INFINITY" || -z $NODE_SCORE ]]; then
            ocf_log info "${LH} Skipping node $NODE as it is not eligible for running the resource. Its score is ${NODE_SCORE:-NULL}"
            continue
        fi
        ocf_log info "${LH} Node's ${NODE} score: ${NODE_SCORE}, GTID/SEQNUM: ${TMP[$NODE]}"
        # Filter node names with the most seen GTID as possible masters and find the latest SEQNO
        if [ "${MASTER_GTID}" = "${TMP[$NODE]%:*}" ]; then
            POSSIBLE_MASTERS="$POSSIBLE_MASTERS $NODE"
            SEQNO=${TMP[$NODE]#*:}
            [ $SEQNO -gt $LATEST_SEQNO ] && LATEST_SEQNO=$SEQNO
        fi
    done
    ocf_log info "${LH} Possible masters: $POSSIBLE_MASTERS"

    # Cut the gtids off the stored GTID:SEQNO pairs, then
    # filter the master, which is one who has the latest SEQNO from the possible masters
    for NODE in $POSSIBLE_MASTERS; do
        if [ "${LATEST_SEQNO}" = "${TMP[$NODE]#*:}" ]; then
            MASTER=$NODE
            break
        fi
    done
    ocf_log info "${LH} Choosed master: ${MASTER} with GTID: ${TMP[$MASTER]}"
    echo "$MASTER"
}

#Find the best master and return its GTID.
#If the best master is this node, propose it as a prim, then return 1.
#If another node is, check if *this* node is also running a new cluster and exit
#with error for safety concerns
check_if_galera_pc() {
    local LH="${LL} check_if_galera_pc():"
    local NODES
    local MASTER
    local timeout
    local GTID
    local pid
    local pcnum=0

    timeout=$(get_master_timeout)

    ocf_log info "${LH} Checking if Primary Component"

    while [ $timeout -gt 0 ]; do
        NODES=$(nodes_in_cluster_online)
        MASTER=$(get_master "$NODES")
        GTID=$(get_node_gtid $MASTER)
        if [ "$MASTER" = "$HOSTNAME" ]; then
            ocf_log info "${LH} I'm Primary Component. Join me! My GTID: ${GTID}"
            echo "${GTID}"
            return 0
    else
        ocf_log info "${LH} I am not going to be primary component"
        clear_start_election_attr
        fi

        if ! check_if_reelection_needed; then
            ocf_log info "${LH} My neighbour is Primary Component with GTID: ${GTID}"
            if check_if_new_cluster
            then
                for node in ${NODES}; do
                    is_pc=$(crm_attribute --quiet --node ${node} --lifetime reboot --query --name is_pc | sed -e '/(null)/d')
                    if [ "${is_pc}" = "true" ]; then
                        pcnum=$((pcnum + 1))
                    fi
                    if [ ${pcnum} -gt 1 ]; then
                        ocf_log err "${LH} But I'm running a new cluster, PID:${pid}, this is a split-brain!"
                        clear_node_pc
                        exit $OCF_ERR_GENERIC
                    fi
                done
            fi
            echo "${GTID}"
            return 1
        fi

        sleep 10
        timeout=$((timeout - 10))
        ocf_log info "${LH} Waiting for master. ${timeout} seconds left"
    done

    ocf_log info "${LH} ${HOSTNAME} is not Primary Component"
    return 1
}
# Functions invoked by resource manager actions

mysql_validate() {
    local LH="${LL} mysql_validate()"

    check_binary $OCF_RESKEY_binary
    check_binary $OCF_RESKEY_client_binary

    if [ ! -f $OCF_RESKEY_config ]; then
        ocf_log err "${LH} Config $OCF_RESKEY_config doesn't exist"
        return $OCF_ERR_INSTALLED
    fi

    if [ ! -d $OCF_RESKEY_datadir ]; then
        ocf_log err "${LH} Datadir $OCF_RESKEY_datadir doesn't exist"
        return $OCF_ERR_INSTALLED
    fi

    getent passwd $OCF_RESKEY_user >/dev/null 2>&1
    if [ ! $? -eq 0 ]; then
        ocf_log err "${LH} User $OCF_RESKEY_user doesn't exit"
        return $OCF_ERR_INSTALLED
    fi

    getent group $OCF_RESKEY_group >/dev/null 2>&1
    if [ ! $? -eq 0 ]; then
        ocf_log err "${LH} Group $OCF_RESKEY_group doesn't exist"
        return $OCF_ERR_INSTALLED
    fi

    return $OCF_SUCCESS
}

check_if_sst() {
    local LH="${LL} check_if_sst():"
    local loglevel=${1:-'info'}
    # Match a MySQLd pid by the datadir, exclude position recovery
    local pid=$(ps -C mysqld -o pid= -o command= -o args= | grep "${OCF_RESKEY_datadir}" | \
        awk '!/wsrep.recover|defunct/ {print $1}')
    if [ "${pid}" ] ; then
        ocf_log info "${LH} MySQL process ${pid} found"
        # MySQLd's running and may be blocked, check for signs of SST
        local wsrep_sst_method=$(awk '/^wsrep_sst_method/ {print $3}' ${OCF_RESKEY_config})
        local wsrep_sst_command="wsrep_sst_${wsrep_sst_method}"
        local wsrep_sst_pid=$(ps -C ${wsrep_sst_command} -o pid= -o command= | \
                              awk '!/defunct/ {print $1}' | head -1)
        if [ "${wsrep_sst_pid}" ]; then
            ocf_log $loglevel "${LH} SST is in progress"
            return $OCF_SUCCESS
        fi
    fi
    ocf_log $loglevel "${LH} No signs of SST found"
    return $OCF_ERR_GENERIC
}

mysql_status() {
    local LH="${LL} mysql_status():"
    local loglevel=${1:-'info'}
    local count=${2:-3}
    local sleeptime=${3:-2}
    local pid

    while [ $count -gt 0 ]; do
        if [ -f "$OCF_RESKEY_pid" ]; then
            ocf_log info "${LH} MySQL PID found"
            break
        fi

        count=$(( count-1 ))
        ocf_log $loglevel "${LH} PIDFile ${OCF_RESKEY_pid} of MySQL server not found. Sleeping for $sleeptime seconds. ${count} retries left"
        sleep $sleeptime
    done

    if [ $count -eq 0 ]; then
        ocf_log $loglevel "${LH} MySQL is not running"
        return $OCF_NOT_RUNNING
    fi

    pid=$(cat $OCF_RESKEY_pid)
    if [ "u$pid" != "u" -a -d /proc/$pid ]; then
        ocf_log $loglevel "${LH} MySQL is running"
        return $OCF_SUCCESS
    else
        ocf_log $loglevel "${LH} MySQL is not running"
        return $OCF_NOT_RUNNING
    fi
}

mysql_monitor() {
    local LH="${LL} mysql_monitor():"
    local rc
    local rc2
    local status_loglevel="err"
    local WSREP_CONNECTED
    local WSREP_LOCAL_STATE_COMMENT
    local WSREP_READY
    local MGTID
    local GTID

    # Set loglevel to info during probe
    if ocf_is_probe; then
        status_loglevel="info"
    fi

    check_if_sst
    rc=$?
    [ $rc -eq $OCF_SUCCESS ] && return $rc

    mysql_status $status_loglevel
    rc=$?
    [ $rc -eq $OCF_SUCCESS ] || return $rc

    GTID=$(update_node_gtid)

    WSREP_CONNECTED=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
        -e "SHOW STATUS LIKE 'wsrep_connected'" | awk '{print $NF}')

    if [ "$WSREP_CONNECTED" != "ON" ]; then
        return $OCF_ERR_GENERIC
    fi

    WSREP_LOCAL_STATE_COMMENT=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
        -e "SHOW STATUS LIKE 'wsrep_local_state_comment'" | awk '{print $NF}')

    if [[ "$WSREP_LOCAL_STATE_COMMENT" =~ 'Synced'|'Donor'|'Desync' ]]; then
        WSREP_READY=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
            -e "SHOW STATUS LIKE 'wsrep_ready'" | awk '{print $NF}')

        if [ "$WSREP_READY" != "ON" ]; then
            ocf_log err "${LH} MySQL synced but not ready"
            return $OCF_ERR_GENERIC
        fi
    elif [[ "$WSREP_LOCAL_STATE_COMMENT" == 'Initialized' ]]; then
        ocf_log err "${LH} MySQL lost quorum or uninitialized"
        return $OCF_ERR_GENERIC
    fi

    # Check if this node is the master and is running the most recent GTID
    check_if_new_cluster
    MGTID=$(check_if_galera_pc)
    rc=$?
    if [ $rc -eq 0 -a "${MGTID}" != "${GTID}" ]; then
        ocf_log err "${LH} I'm a master, and my GTID: ${GTID}, which was not expected"
        return $OCF_ERR_GENERIC
    fi
    ocf_log debug "${LH} MySQL monitor succeeded"
    return $OCF_SUCCESS
}

mysql_start() {
    local LH="${LL} mysql_start():"
    local NODES
    local socket_dir
    local pid_dir
    local rc
    local dir
    local mysql_extra_params

    if mysql_status info 1; then
        ocf_log info "${LH} MySQL already running"
        return $OCF_SUCCESS
    fi

    socket_dir="$( dirname ${OCF_RESKEY_socket} )"
    if [ ! -d "${socket_dir}" ] ; then
        ocf_log info "${LH} Create socket dir: ${socket_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
        mkdir -p "${socket_dir}"
        chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${socket_dir}"
        chmod 755 "${socket_dir}"
    fi

    # check and make PID file dir
    pid_dir="$( dirname ${OCF_RESKEY_pid} )"
    if [ ! -d "${pid_dir}" ] ; then
        ocf_log info "${LH} Create PID dir: ${pid_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
        mkdir -p "${pid_dir}"
        chown -R ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${pid_dir}"
        chmod 755 "${pid_dir}"
    fi

    # Regardless of whether we just created the directory or it
    # already existed, check whether it is writable by the configured
    # user
    for dir in $pid_dir $socket_dir; do
        if ! /usr/bin/sudo -n -u $OCF_RESKEY_user /usr/bin/test -w $dir; then
            ocf_log err "${LH} Directory $dir is not writable by $OCF_RESKEY_user"
            exit $OCF_ERR_PERM
        fi
    done

    if [ -f /tmp/wsrep-init-file ]; then
        mysql_extra_params="--init-file=/tmp/wsrep-init-file"
    else
        mysql_extra_params=""
    fi

    update_node_gtid

    local timestamp
    local nodes_in_election

    NODES=$(nodes_in_cluster_online)

    ocf_log info "${LH} entering critical section on master election"
    while :;
    do
        nodes_in_election=0

        for node in ${NODES}; do
            timestamp=$( date -u +%s )
            election_attr=$(crm_attribute --quiet --node ${node} --type crm_config --query \
            --name start_election_${node} 2> /dev/null | sed -e '/(null)/d' | tr -d '[:space:]')

            election_attr=${election_attr:-0}
            ocf_log info "${LH} election attribute for node ${node} is ${election_attr} and time is ${timestamp}"
            if [ $(( ${timestamp} - ${election_attr} )) -gt $(get_master_timeout) ];
            then
                ocf_log info "${LH} election attribute for node ${node} is not valid."
                continue
            fi
            ocf_log info "${LH} election attribute for node ${node} is valid."
            ((nodes_in_election++))
        done

        if (( nodes_in_election == 0 ));
        then
            ocf_log info "${LH} there are no valid election attributes. safe to proceed."
            start_election_attr
            break
        fi
        ocf_log info "${LH} there is valid election attribute. sleeping for a while."
        sleep $(( ( $RANDOM % 10 )  + 1 ))
    done

    check_if_reelection_needed
    rc=$?

    if [ $rc -eq 0 ]; then
        check_if_galera_pc
        rc=$?

        if [ $rc -eq 0 ]; then
           mysql_extra_params="$mysql_extra_params --wsrep-new-cluster"
           update_node_pc
        else
           clear_node_pc
        fi
    else
        clear_start_election_attr
    fi

    ocf_log info "${LH} Starting MySQL"
    ${OCF_RESKEY_binary} \
        --pid-file=$OCF_RESKEY_pid \
        --socket=$OCF_RESKEY_socket \
        --datadir=$OCF_RESKEY_datadir \
        --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
    $mysql_extra_params >/dev/null 2>&1 &
    rc=$?

    if [ $rc -ne 0 ]; then
        ocf_log err "${LH} MySQL start command failed: $rc"
        clear_node_pc
        clear_start_election_attr
        return $rc
    fi

    # Spin waiting for the server to come up or exit, if SST's in progress
    # Let the CRM/LRM time us out if required.
    while :; do
        check_if_sst
        rc=$?
        [ $rc -eq $OCF_SUCCESS ] && break

        if mysql_status info 1; then
            break
        fi
        sleep 3
    done

    ocf_log info "${LH} MySQL started"
    clear_start_election_attr
    return $OCF_SUCCESS
}

mysql_cleanup() {
    local LH="${LL} mysql_cleanup():"
    ocf_log info "${LH} Cleaning up gtid attribute"
    crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
        --delete

    clear_node_pc
    ocf_log debug "${LH} Delete lock file: /var/lock/subsys/mysqld"
    rm -f /var/lock/subsys/mysqld

    ocf_log debug "${LH} Delete sock file: ${OCF_RESKEY_socket}"
    rm -f $OCF_RESKEY_socket

    ocf_log debug "${LH} Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
    rm -f $OCF_RESKEY_pid

}

mysql_stop() {
    local LH="${LL} mysql_stop():"
    local shutdown_timeout
    local rc
    ocf_log info "${LH}"

    shutdown_timeout=15
    if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
        shutdown_timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000)-5 ))
    fi

    proc_stop "${OCF_RESKEY_pid}" "mysqld.*${OCF_RESKEY_datadir}" SIGTERM 5 $(( $shutdown_timeout/5 ))
    mysql_cleanup
    return $OCF_SUCCESS
}

##########################################################################
# If DEBUG_LOG is set, make this resource agent easy to debug: set up the
# debug log and direct all output to it.  Otherwise, redirect to /dev/null.
# The log directory must be a directory owned by root, with permissions 0700,
# and the log must be writable and not a symlink.
##########################################################################
DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
UUID_REGEX="\w{8}-\w{4}-\w{4}-\w{4}-\w{12}:([[:digit:]]+|-1)"
ZEROID="00000000-0000-0000-0000-000000000000"
if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
    DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
    if [ -d "${DEBUG_LOG_DIR}" ]; then
        exec 9>>"$DEBUG_LOG"
        exec 1>&9 2>&9
        date '+%Y%m%d %H:%M:%S' >&9
        echo "$*" >&9
        env | grep OCF_ | sort >&9
        set -x
    else
        exec 9>/dev/null
    fi
fi

case "$1" in
    meta-data)    meta_data
        exit $OCF_SUCCESS;;
    usage|help)   usage
        exit $OCF_SUCCESS;;
esac

export LL="${OCF_RESOURCE_INSTANCE}:"

mysql_validate
rc=$?
if [ $rc -ne 0 ]; then
    case "$1" in
        stop) exit $OCF_SUCCESS;;
        monitor) exit $OCF_NOT_RUNNING;;
        *) exit $rc;;
    esac
fi

# What kind of method was invoked?
case "$1" in
    start)        mysql_start;;
    stop)         mysql_stop;;
    monitor)      mysql_monitor;;
    validate-all) exit $OCF_SUCCESS;;
    *)            usage
        exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vim: set ts=4 sw=4 tw=0 et :
