High Availability Linux/Unix
Creating a cluster. Pacemaker
csync2.
Csync2 is the configuration manager of the cluster.
The service mantains the same configuration for all the files involved in a cluster deployment.
Software installation.
apt-get install openssh-server
apt-get install lynx
apt-get update
apt-get install openssh-server
apt-get install gcc
apt-get install make
apt-get install csync2
apt-get install pacemaker
apt-get install xinetd
Configuration.
file: etc/corosync/corosync.conf
# Please read the openais.conf.5 manual page
totem {
version: 2
# How long before declaring a token lost (ms)
token: 3000
# How many token retransmits before forming a new configuration
token_retransmits_before_loss_const: 10
# How long to wait for join messages in the membership protocol (ms)
join: 60
# How long to wait for consensus to be achieved before starting a new round of membership configuration (ms)
consensus: 5000
# Turn off the virtual synchrony filter
vsftype: none
# Number of messages that may be sent by one processor on receipt of the token
max_messages: 20
# Limit generated nodeids to 31-bits (positive signed integers)
clear_node_high_bit: yes
# Disable encryption
secauth: off
# How many threads to use for encryption/decryption
threads: 0
# Optionally assign a fixed node id (integer)
# nodeid: 1234
# This specifies the mode of redundant ring, which may be none, active, or passive.
rrp_mode: none
interface {
# The following values need to be set based on your environment
# change these parameters to fit your machine network configuration.
ringnumber: 0
bindnetaddr: 192.168.0.101
mcastaddr: 224.0.0.1
mcastport: 5605
}
}
amf {
mode: disabled
}
service {
# Load the Pacemaker Cluster Resource Manager
ver: 0
name: pacemaker
}
aisexec {
user: root
group: root
}
logging {
fileline: off
to_stderr: yes
to_logfile: no
to_syslog: yes
syslog_facility: daemon
debug: on
timestamp: on
logger_subsys {
subsys: AMF
debug: off
tags: enter|leave|trace1|trace2|trace3|trace4|trace6
}
}
/etc/csync2.cfg
group ha
{
host ubuntu ubuntu2;
key /etc/csync2.key_mygroup;
# Configuration files to keep synchronized.
include /etc/csync2.cfg;
# Executables (scripts) to keep synchronized.
# include /usr/sbin/LinkCheck;
# Directories to keep synchronized.
# include /shared_stuff/;
# include %homedir%/weibullguy;
# Excluded items.
# exclude *~ .*;
# exclude /shared_stuff/;
# What action to take when the csync2.cfg file is synced.
# Execute the mailer script to send me an e-mail.
# Log this action in the csync2 log file.
action
{
pattern /etc/csync2.cfg;
#exec "/etc/csync2/mailer";
logfile "/var/log/csync2_action.log";
do-local;
}
# Create backups.
# backup-directory /var/backups/csync2;
# backup-generations 3;
auto none;
}
# The homedir variable on frodo and legolas (hosts 2 and 3)
# is different than it is on all other hosts (only aragorn in this case).
#prefix homedir
#{
# on host[23]: /home/users;
# on *: /home;
#}
HealthCheck
/etc/services
servermon 9999/tcp # fidonet EMSI over TCP
/etc/xinet.d/servermon
service servermon
{
socket_type = stream
wait = no
user = root
server = /usr/sbin/checkstatus
disable = no
}
/usr/sbin/checkstatus
#!/bin/bash
export tss=`date +%Y%m%dT%H%M%S`
export host_conn="192.168.0.101"
export name=`cat /etc/hostname`
echo "server_name=$name"
echo "timestamp=$tss"
ping -c 1 -w 1 $host_conn>/dev/null
export network_stat=$?
if [ $network_stat == 0 ]
then echo "network_stat=up"
else
echo "network_stat=down"
fi
/etc/ha/nodes.conf
nodes=ubuntu2 ubuntu3
/usr/sbin/nodes_stat
#!/bin/bash
function getParam(){
export param=$1
export file=$2
cat $file|grep -i $param|cut -d = -f2
}
export host_list=`getParam nodes /etc/ha/nodes.conf`
for host_n in $host_list
do
nc ${host_n} 9999 > /dev/null
export exit_stat=$?
if [ $exit_stat == 0 ]
then
echo "${host_n}=up"
else
echo "${host_n}=down"
exit 2
fi
done
exit 0
Monitoring Watchdog
/etc/ha/service.conf
[monitor definition]
wdgd_log_dir=/var/log/
process_monitor=ssh
crash_notification=echo
monitor_services_ssh=/etc/init.d/ssh
monitor_services_option_ssh= restart
monitor_script_ssh=/usr/sbin/port_mon localhost 22
monitor_postaction_ssh=sleep 5
/usr/sbin/show_wdgd
#!/bin/bash
export proc=$1
while true; do clear; cat /var/log/wdgd_${proc}; sleep 1; done
/usr/sbin/port_mon
#!/bin/bash
nc $1 $2 -w 1
if [ "$?" == "0" ]
then
exit 0
else
exit 1
fi
/usr/sbin/wdgd_srv
#!/bin/bash
export lock_file="/tmp/lock_wdgd.lock"
while true
do
if [ -f $lock_file ]
then
watchdog
else
exit 1
fi
done
/etc/init.d/wdgd
#!/bin/bash
function getParam(){
export param=$1
export file=$2
cat $file|grep -i $param|cut -d = -f2
}
export logDir=`getParam wdgd_log_dir /etc/ha/service.conf`
export tdate=`date +%Y%m%dT%H%M%S`
# debug
# export logFile="${logDir}/wdgd_${tdate}.log"
export wdg_home=/usr/sbin/
case $1
in
start)
export logFile=/dev/null
touch /tmp/lock_wdgd.lock
${wdg_home}wdgd_srv 2>${logFile} 1>>${logFile} &
exit 0;;
debug)
export logFile="${logDir}/wdgd_${tdate}.log"
touch /tmp/lock_wdgd.lock
${wdg_home}wdgd 2>${logFile} 1>>${logFile} &
exit 0;;
stop)
rm /tmp/lock_wdgd.lock
exit 0;;
status)
if [ -f "/tmp/lock_wdgd.lock" ]
then
echo "running"
else
echo "stopped"
fi
exit 0;;
*)
echo "please type one of the following options: start, debug, stop, status "
exit 2;;
esac
/usr/sbin/watchdog
#!/bin/bash
function getParam(){
export param=$1
export file=$2
cat $file|grep -i $param|cut -d = -f2
}
###
function testMonScript()
{
export nombre=$1
export script_mon=`getParam monitor_script_${nombre} /etc/ha/service.conf`
export script_cut=${script_mon}|awk {'print $1'}
if [ -x $script_cut ]
then
echo "$script_mon"
else
echo "NOT_FOUND"
fi
}
###
function logService()
{
export service_name=$1
export base_log=`getParam wdgd_log_dir /etc/ha/service.conf`
export base_dir_log=$base_log
export message=$2
export file_output="${base_dir_log}/wdgd_${service_name}"
export tdate=`date`
echo "${tdate}:> ${message}">$file_output
}
export srvlist=`getParam process_monitor /etc/ha/service.conf`
echo "================================================================"
## for every service in process_monitor we are going to launch the monitor script
for service in $srvlist
do
## first we test if the service to monitor exists, else there is nothing to mon
export process=`getParam monitor_services_${service} /etc/ha/service.conf`
export pars_p=`getParam monitor_services_option_${service} /etc/ha/service.conf`
echo "(*) launching monitor for: ${service}"
if [ -x $process ]
then
echo "file exist $process"
export test_monitor_scr=`testMonScript $service`
echo "monitor script $test_monitor_scr"
## if the monitor is not found exit the process and show the error
if [ "$test_monitor_scr" == "NOT_FOUND" ]
then
echo "monitor_services_$service value in service.conf is not executable or does not exist"
exit 5
else
## the monitor exists and is an executable file
echo "ready to lauch monitor: $test_monitor_scr"
$test_monitor_scr
export status=$?
if [ $status == "0" ]
then
## the monitor returned 0 so it seems the monitor to process is ok
echo "monitor for $service ok"
echo "----------------------->"
logService $service "${service}=ok"
else
## the monitor returned something else than 0 so it seems the monitor to process is down
logService $service $base_dir_log "${service}=down"
echo "*******************************************************************"
echo "WARNING !!!! service $service is dead "
echo "launching $process with the following params: $pars_p"
export rescue_cmd="$process $pars_p"
$rescue_cmd
logService $service "${service}=starting"
export stat_cm=$?
echo "Command status: $stat_cm"
## we must check the exit status for the rescue command
export command_post_start=`getParam monitor_postaction_${service} /etc/ha/service.conf`
echo "launching post-startup command $command_post_start"
$command_post_start
if [ $stat_cm == "0" ]
then
## we check that really the server startup was sucessfull
$test_monitor_scr
export status_r=$?
if [ $status_r == "0" ]
then
## it was !!! :)
logService $service "${service}=started"
echo "success"
else
## it was not !!! :(
export ldate=`date`
export message="CRITICAL: service ${service} was not successfully started after automatic restart. time ( $ldate )"
echo $message
logService $service "${service}=ko"
export crash_cmd=`getParam crash_notification /etc/ha/service.conf`
$crash_cmd $message
fi
else
## ups!!!
export ldate=`date`
export message="CRITICAL: service ${service} was not successfully started after crash. time ( $ldate )"
echo $message
export crash_cmd=`getParam crash_notification /etc/ha/service.conf`
$crash_cmd $message
export command_post_start=`getParam monitor_postaction_${service} /etc/ha/service.conf`
echo "launching post-startup command on fault $command_post_start"
$command_post_start
fi
echo "*******************************************************************"
fi
fi
else
echo "file does not exist"
fi
done
eDirectory Command Wrapper
#!/bin/bash
#############Vars
export DEBUG="debug"
export INFO="info"
export DEFAULT_LOG_LEVEL="debug"
export DIR_NDS=`cat /etc/opt/novell/eDirectory/conf/.edir/instances.0`
export cmd_stop="/opt/novell/eDirectory/bin/ndsmanage stop --config-file ${DIR_NDS}"
export cmd_start="/opt/novell/eDirectory/bin/ndsmanage start --config-file ${DIR_NDS}"
#export cmd_stop="/root/test.sh stop ${DIR_NDS}"
#export cmd_start="/root/test.sh start ${DIR_NDS}"
#### exit 0
############Funciones
function countProcesses {
export proc=`ps -ef |grep -i ndsd|grep -v "grep" |wc|awk {'print $1'}`
echo $proc
}
function log {
export tlevel=$1
export mesg=$2
export level=$DEFAULT_LOG_LEVEL
export fdat=`date +%s`
if [ $tlevel == $level ];
then
echo "${fdat} [${tlevel}]: ${mesg} "
elif [ $tlevel == "info" ];
then
echo "${fdat} [info]: ${mesg} "
fi
}
function mailroot {
echo $1|mail root
}
#####################Logica general
case $1 in
start)
$cmd_start
export stat_code=$?
if [ $stat_code -eq 0 ];
then
log $INFO "ejecucion script arranque correcta"
else
log $INFO "ejecucion script arranque incorrecta"
mailroot "El script ndsmanage ha mostrado un error."
exit 1
fi
log $INFO "Esperando 5 segundos para comprobar los procesos de eDirectory"
sleep 5
export num=`countProcesses`
log $DEBUG "contando procesos: $num"
if [ $num -eq 1 ];
then
log $INFO "el servicio ha arrancado correctamente"
exit 0
else
mailroot "El servicio no se ha arrancado correctamente."
exit 1
fi
## FIN CASE START
;;
stop)
trap 'echo "salida anormal";exit 126' 2
log $INFO "se va a parar el servicio del directorio"
$cmd_stop
export stat_code=$?
if [ $stat_code -eq 0 ];
then
log $INFO "ejecucion script parada correcta"
else
log $INFO "ejecucion script parada incorrecta"
mailroot "El script mdsmanage de eDirectory ha mostrado un error mientras se lanzaba la parada del directorio. Se va a abortar la copia"
exit 1
fi
log $INFO "Esperando 60 segundos para dar tiempo a la parada ordenada de eDirectory"
sleep 10
log $INFO "Lanzando comprobacion"
export num=`countProcesses`
log $DEBUG "contando procesos: $num"
if [ $num -eq 1 ];
then
log $INFO "el servicio ndsd no se ha parado todavia"
for try in `seq 5`
do
sleep 5
log $INFO "comprobacion numero: $try "
export inum=`countProcesses`
if [ $inum -eq 0 ];
then
echo "el directorio ha parado correctamente"
exit 0
else
echo "el directorio todavia no ha parado"
fi
done
mailroot "El directorio no se ha parado. Abortamos copia de seguridad"
exit 1
else
## salida ok!!!
log $INFO "el servicio de directorio se ha parado. Devolvemos codigo de status correcto"
exit 0
fi
### FIN CASE STOP
;;
rep_t)
ndsrepair -T
;;
rep_s)
ndsrepair -E
;;
status)
export inum=`countProcesses`
if [ $inum -eq 0 ];
then
echo "el directorio esta parado"
exit 3
else
echo "el servicio de directorio esta arrancado"
exit 0
fi
### FIN STATUS
;;
*)
echo "se debe pasar un argumento de tipo start/stop/rep_t/rep_s"
;;
esac
No hay comentarios:
Publicar un comentario