Skip to content
Snippets Groups Projects
system_clock_monitor 10.3 KiB
Newer Older
#!/bin/bash 

# This script measure the drift between the local system clock and 
# a Time of Day source (NTP, NMEA, IRIG-B)

tmpdir="/tmp"
cronFile="/etc/cron.d/root"
tmpCronFile="$tmpdir/root.cron"
prefix="system_clock_monitor"
debugLogFile="$tmpdir/$prefix.log"
script="/etc/init.d/system_clock_monitor"
systemClockMonitoringStatus="$tmpdir/${prefix}_status"
systemClockMonitoringDrift="$tmpdir/${prefix}_drift"
dotConfig="/wr/etc/dot-config"
fileNtpServerConfig="/etc/wr_date.conf"
ntpTool="/usr/sbin/ntpd"
suspendKillDaemon=0
pidKillDaemon=0
verbose=0

#
# Write message to file
# $1: Message 
# $2: Output file

writeMsg() {
  msg=$1
  of=$2
  
  oft="$of.old"
  # If old file exists then remove it 
  if [ -f $oft ] ; then
     rm -f $oft
  fi
  # if file exists then rename it 
  if [ -f $of ] ; then
  	mv $of $oft
  fi
  # create the file
  echo "$msg" > $of
}

#
# Print message if verbose is set
#
debug () {
	if [[ $verbose == 1 ]]; then
		eval echo $1 $LOGPIPE 
	fi
}

#
# remove from cron file any entry related to system clock monitor
#  
cleanCronConfig ()
{
	local entryPresent=0
	
	set -f
	: > $tmpCronFile
	while IFS= read -r line; do
		if [[ "$line" =~ $script ]] ; then 
			entryPresent=1
		else 
			echo "$line" >> $tmpCronFile
		fi
	done < "$cronFile"
	if (( entryPresent == 1 )) ; then
	  	 debug "$cronFile has been cleaned up"
		 mv $cronFile $cronFile.old
		 mv $tmpCronFile $cronFile
	else
    	debug "$cronFile  does not need to be cleaned up "
		rm -f $tmpCronFile
	fi
	set +f
}

#
# Add system clock monitor entry in crontab
#  
setCronConfig ()
{
	local __entry=$1
	
	debug "Add new entry in cron file $cronFile"
	debug "New entry added \"$__entry\""
	echo "$entry" >> $cronFile
}

#
# Compare the offset to the threshold value
# 
compareToThreshold () 
{
	local  __resultvar=$1
	# $(( )) to remove leading "+"
	local  __c_offset=$(( $2 ))
	local  __c_threshold=$3

	if (( $__c_offset >= __c_threshold )) ; then
		x=$(( $__c_offset - $__c_threshold ))
		debug "X1System clock drift is exceeding the threshold by ${x} sec"
		eval $__resultvar="1"
	elif (( -$__c_offset >= __c_threshold )) ; then
		x=$(( $__c_offset + $__c_threshold ))
		debug "X2System clock drift is exceeding the threshold by ${x} sec"
		eval $__resultvar="1"
	else
		debug "System clock drift is not exceeding the threshold"
		eval $__resultvar="0"
	fi
}

#
# Decode NTPD deamon output to get the offset in seconds.useconds
{
	local __resultvar=$1
	local __str=$2
	local __offset=$(echo $__str | sed -n 's/.* offset:\?\([+-][0-9]*\)\.\?\([0-9]*\) .*/\1.\2/p')
	
	if [[ -z "$__offset" ]] ; then
		# Empty string
		debug "Invalid Offset !!!"
		debug "NTP msg=\"$__str\""
		return 1
	else
		# Change the sign of the offset. ntpd returns a positive
		# offset when the ntp time is ahead of the local time, which is
		# counter-intuitive.
		if [ "${__offset:0:1}" = "-" ]; then
			__offset=+${__offset:1}
		elif [ "${__offset:0:1}" = "+" ]; then
			__offset=-${__offset:1}
		fi
		debug "NTP offset=$__offset s"
		eval $__resultvar="'$__offset'"
#
# Kill the NTPD daemon in background after few seconds
#
killNTPD () 
{
	local delay=$1 # Delay in seconds
	
	if (( $pidKillDaemon != 0 )) ; then  
		debug "Daemon actif !!!" 
		kill -9 $pidKillDaemon &>/dev/null
		pidKillDaemon=0
	fi
	( 
		sleep $delay
		# Search for a ntpd only within this script's childs
		p=$(pgrep -f -g $pgid $ntpTool)
		if [[ -n "$p" ]] ; then
			kill -9 $p &>/dev/null
		fi
		pidKillDaemon=0
	) &
	pidKillDaemon=$!
}

#
# Read the NTP server to get the offset between NTP and local system time
#
read_ntp_server()
{
	local __result=$1
	local ltThreshold=$2
	local server=$3
	local retries=2
	local offset=-1

	server_list=""
	OIFS=$IFS
	# support comma, semicolon, space as server delimiter
	IFS=',; '
	for i in $server; do
		# assembly list of servers prefixed with -p
		server_list="$server_list -p $i"
	done
	IFS=$OIFS

	debug "NTP server=$server_list"
	for i in `seq $retries` ; do # Manual retries
		killNTPD 10
		ntpRes=$($ntpTool  -n -w -q -d $server_list 2>&1)
		if [ -n "$ntpRes" ] ; then
			decodeOffset offset "$ntpRes"
			if [ $? -eq 0 ] ; then
				compareToThreshold alarmState ${offset%.*} $ltThreshold
				if (( $alarmState == 1 )) ; then
					# Exceeded Threshold  
					writeMsg "exceeded_threshold" $systemClockMonitoringStatus
					writeMsg "no_error"  $systemClockMonitoringStatus				
				writeMsg  $offset $systemClockMonitoringDrift
				eval $__result="0"
				return
			fi
			eval echo "Retry $i/$retries : Cannot extract offset from NTP message." $LOGPIPE
			eval echo "Retry $i/$retries : NTP query failed, unable to contact server\(s\) \($server\)." $LOGPIPE
	eval echo "ERROR: could not reach NTP server\(s\) \($server\) after $retries retries" $LOGPIPE
	eval $__result="1"
}

decodeOffset_wr_date ()
{
	local __resultvar=$1
	local __str=$2
	local __tod_type=$3
	local __offset=$(echo $__str | sed -n "s/.*SW(UTC)-${__tod_type}(UTC): \?\([+-][0-9]*\)\.\?\([0-9]*\).*/\1.\2/p")
	
	if [[ -z "$__offset" ]] ; then
		# Empty string
		debug "Invalid Offset !!!"
		debug "$__tod_type msg=\"$__str\""
		return 1
	else
		debug "$__tod_type offset=$__offset s"
		eval $__resultvar="'$__offset'"
		return 0
	fi
}

#
# Read the TOD source to get the offset between TOD and local system time
#
read_tod()
{
	local __result=$1
	local ltThreshold=$2
	local todName=$3
	local todNameNoDash=${todName//-/}
	local todNameNoDashLowerCase=${todNameNoDash,,}
	local retries=2
	local offset=-1

	debug "read ${todName} \(${todNameNoDash}, ${todNameNoDashLowerCase}\)"
	for i in `seq $retries` ; do # Manual retries
		wr_date_res=$($wr_dateTool $todNameNoDashLowerCase diff -v)
		if [ -n "$wr_date_res" ] ; then
			decodeOffset_wr_date offset "$wr_date_res" "$todNameNoDash"
			if [ $? -eq 0 ] ; then
				compareToThreshold alarmState ${offset%.*} $ltThreshold
				if (( $alarmState == 1 )) ; then
					# Exceeded Threshold
					writeMsg "exceeded_threshold" $systemClockMonitoringStatus
				else
					writeMsg "no_error"  $systemClockMonitoringStatus				
				fi
				writeMsg  $offset $systemClockMonitoringDrift
				eval $__result="0"
				return
			fi
			eval echo "Retry $i/$retries : Cannot extract offset from ${todName}." $LOGPIPE
		else
			eval echo "Retry $i/$retries : ${todName} query failed, unable to get Time of Day." $LOGPIPE
		fi
	done
	eval echo "ERROR: could not get Time of day via ${todName} after $retries retries" $LOGPIPE
	eval $__result="1"
}

#
# Apply dot-config configuration
#
if [ -f $dotConfig ]; then
    # source dot-config
    . $dotConfig
else
	echo "$0 unable to source dot-config ($dotConfig)!"
fi

WRS_LOG=$CONFIG_WRS_LOG_OTHER

# if empty turn it to /dev/null
if [ -z $WRS_LOG ]; then
	WRS_LOG="/dev/null";
fi
# if a pathname, use it
if echo "$WRS_LOG" | grep / > /dev/null; then
	eval LOGPIPE=\" \> $WRS_LOG 2\>\&1 \";
elif [ "$WRS_LOG" = "default_syslog" ]; then
	# not a pathname: use verbatim
	eval LOGPIPE=\" 2\>\&1 \| logger -t $prefix --prio-prefix -p daemon.info\"
else
	# not a pathname: use verbatim
	eval LOGPIPE=\" 2\>\&1 \| logger -t $prefix -p $WRS_LOG\"
fi

debug "Script started with options \\\"$*\\\""

# Read options
if [ "$#" -eq 1 ] && [ "$1" == "-s" ] ; then
  
  	debug "Setup configuration"
	cleanCronConfig 
	if [ "$CONFIG_SNMP_SYSTEM_CLOCK_MONITOR_ENABLED" = "y" ] ; then
		set -f # Disable globbing 
		NEWLINE=$'\n'
		# System clock monitor enabled. Setup cron file
		if [ "$CONFIG_SNMP_SYSTEM_CLOCK_UNIT_MINUTES" = "y" ] ; then
			intervalValue=$CONFIG_SNMP_SYSTEM_CLOCK_CHECK_INTERVAL_MINUTES
			debug "Time interval: ${intervalValue} minutes"
			entry="# System clock monitor: Execute the script \"${script}\" every ${intervalValue} minute(s)${NEWLINE}"
			entry+="*/${intervalValue} * * * *  ${script}"; 
			setCronConfig "$entry"
		else 
			if [ "$CONFIG_SNMP_SYSTEM_CLOCK_UNIT_HOURS" = "y" ] ; then 
			    intervalValue=$CONFIG_SNMP_SYSTEM_CLOCK_CHECK_INTERVAL_HOURS
				debug "Time interval: ${intervalValue} hours"
			    entry="# System clock monitor: Execute the script \"${script}\" every ${intervalValue} hour(s)${NEWLINE}"
			    entry+="0 */${intervalValue} * * *  ${script}"
				setCronConfig "$entry" 
			else
				if [ "$CONFIG_SNMP_SYSTEM_CLOCK_UNIT_DAYS" = "y" ]; then
					intervalValue=$CONFIG_SNMP_SYSTEM_CLOCK_CHECK_INTERVAL_DAYS
					debug "Time interval: ${intervalValue} days"
				    entry="# System clock monitor: Execute the script \"${script}\" every ${intervalValue} day(s)${NEWLINE}"
					entry+="0 0 */${intervalValue} * *  ${script}" 
					setCronConfig "$entry" 
				else
 					eval echo "Invalid unit for system clock check interval." $LOGPIPE
					writeMsg "config_error" $systemClockMonitoringStatus
					writeMsg "0" $systemClockMonitoringDrift
					exit 1
				fi 
			fi 
		fi  
		set +f # Re-enable globbing 
		eval echo "cron file \"root\" configured" $LOGPIPE
	else
		# System clock monitor disabled. Make a clean up 
		rm -f  $systemClockMonitoringStatus
		rm -f  $systemClockMonitoringDrift
	fi
 	exit 0
fi

if [ "$CONFIG_SNMP_SYSTEM_CLOCK_MONITOR_ENABLED" != "y" ] ; then
	exit 1
fi


threshold=$CONFIG_SNMP_SYSTEM_CLOCK_DRIFT_THOLD

if [ -z  "$threshold" ] ; then
	eval echo "System clock drift threshold not set." $LOGPIPE
	writeMsg "config_error" $systemClockMonitoringStatus
	writeMsg "0" $systemClockMonitoringDrift
	exit 1
fi

if [ "$CONFIG_TOD_SOURCE_NTP" = "y" ] ; then

	ntpServer=""
	# Get the NTP server
	if [ -f $fileNtpServerConfig ]; then
		# pick the first server, if any
		ntpServer=$(grep 'ntpserver' $fileNtpServerConfig | sed 's/ntpserver//' | head -n 1)
	fi
	if [ -z "$ntpServer" ]; then
		eval echo "Empty NTP server name" $LOGPIPE
		writeMsg "config_error" $systemClockMonitoringStatus
		writeMsg "0" $systemClockMonitoringDrift
	read_ntp_server result $threshold "$ntpServer"
	if (( result != 0 )) ; then
		writeMsg "ntp_error" $systemClockMonitoringStatus
		writeMsg "0" $systemClockMonitoringDrift

elif [ "$CONFIG_TOD_SOURCE_NMEA" = "y" ] ; then
	read_tod result $threshold "NMEA"
	if (( result != 0 )) ; then
		writeMsg "nmea_error" $systemClockMonitoringStatus
		writeMsg "0" $systemClockMonitoringDrift
		exit 1
	fi

elif [ "$CONFIG_TOD_SOURCE_IRIGB" = "y" ] ; then
	read_tod result $threshold "IRIG-B"
	if (( result != 0 )) ; then
		writeMsg "irigb_error" $systemClockMonitoringStatus
		writeMsg "0" $systemClockMonitoringDrift
		exit 1
	fi