#!/bin/bash
#
# $Id$
#
# checks several RAM usage patterns
#

# prepare

. /usr/lib/montools/mt-prepare

RESNAME="RAM usage"

LOCKFILE=/tmp/mt-check-ram.lck
if ( set -o noclobber; echo "Locked" > "$LOCKFILE") 2> /dev/null; then
  trap 'rm -f "$LOCKFILE"; exit $?' INT TERM EXIT
else
  echo "Failed to lock $LOCKFILE, aborting." >&2
  add2warnings \
    "mt-check-ram aborting on $HOSTNAME because $LOCKFILE still exists from former run"
  . /usr/lib/montools/mt-notify-exit
  exit 1
fi

# work

# Free Memory
#MEMFREE=$(free |head -3 |tail -1 |awk '{print $4}')
MEMFREE=$(cat /proc/meminfo |grep "^MemFree" |awk '{print $2}')
MEMAVAIL=$(cat /proc/meminfo |grep "^MemAvailable" |awk '{print $2}')
SWAPFREE=$(cat /proc/meminfo |grep "^SwapFree" |awk '{print $2}')
if [ $MEMAVAIL -lt $MINMEMFREE ]
then
  add2criticals \
    "Memory low on $HOSTNAME: Mem available is $MEMAVAIL kb, expecting $MINMEMFREE kb"
  if [ $SWAPFREE -lt $MINSWAPFREE ]
  then
    add2criticals \
      "Also, swap memory low on $HOSTNAME: Swap free is $SWAPFREE kb, expecting $MINSWAPFREE kb"
  fi
else
  if [ $SWAPFREE -lt $MINSWAPFREE ]
  then
    add2warnings \
      "Warning: Swap memory low on $HOSTNAME: Swap free is $SWAPFREE kb, expecting $MINSWAPFREE kb. Main memory is ok, $MEMAVAIL kb available and $MEMFREE kb free above $MINMEMFREE kb wanted"
  fi
fi  


# Memory usage
MAXMEMPID=$(grep VmRSS /proc/*/status 2>/dev/null |sort -n -k2 |tail -1 |cut -f3 -d'/')
MAXMEMCMD=$(cat /proc/$MAXMEMPID/cmdline 2>/dev/null |tr '\0' ' ' |cut -c-80)
MAXMEMMEM=$(grep VmRSS /proc/$MAXMEMPID/status 2>/dev/null |awk '{print $2}')

TMPDIR=/var/log/montools/ram-breakdowns/$$
if [ -d ${TMPDIR} ] ; then
  echo "INFO: Removing old TMPDIR $TMPDIR ..." >&2
  rm -rfv ${TMPDIR} >&2
fi
mkdir -p ${TMPDIR}
chmod 0700 ${TMPDIR}

if [ 0$MAXMEMMEM -gt 0$MAXPROCMEM ]
then
  add2criticals \
    "Heavy process with PID $MAXMEMPID using $MAXMEMMEM KB: $MAXMEMCMD" \
    "See ${TMPDIR}/heavyprocess_$MAXMEMPID"
  if [ -x "/usr/sbin/vzpid" ] ; then
    add2criticals \
      "vzpid $MAXMEMPID says:" \
      "$(/usr/sbin/vzpid $MAXMEMPID)"
  fi
  #cat ${TMPDIR}/ps-ef.out |grep $MAXMEMPID >${TMPDIR}/heavyprocess_$MAXMEMPID
  ps -ef |grep $MAXMEMPID >${TMPDIR}/heavyprocess_$MAXMEMPID
fi  

# open files

LOAD=$(cat /proc/loadavg)
CURLOAD=$(echo "$LOAD" |cut -f1 -d'.')
if [ $CURLOAD -gt $CRITICALLOAD ] ; then
  add2criticals \
     "Load over CRITICAL treshhold ${PANICLOAD}: ${CURLOAD}." \
     "Suppressing lsof count to protect system."
  . /usr/lib/montools/mt-notify-exit
fi

OPENFILES=$(lsof -l -n -P -b -w 2>&1 |grep -v "no pwd entry" |tee ${TMPDIR}/lsof.out |wc -l)
cat ${TMPDIR}/lsof.out |cut -f1 -d' ' |sort |uniq -c >${TMPDIR}/lsof.bybinary
if [ $OPENFILES -gt $MAXOPENFILES ] 
then
  add2warnings \
    "There are $OPENFILES files open on $HOSTNAME" \
    'See ${TMPDIR}/lsof.*.evil'
  cat ${TMPDIR}/lsof.out      |gzip >${TMPDIR}/lsof.out.evil.gz
  cat ${TMPDIR}/lsof.bybinary |gzip >${TMPDIR}/lsof.bybinary.evil.gz
fi
rm -f ${TMPDIR}/lsof.out ${TMPDIR}/lsof.bybinary


if [ -z "$CRITICALS" ] ; then
  add2infos "RAM resources and open files seem to be within expected limits."
fi

find /var/log/montools -type d -empty |grep /var/log/montools && find /var/log/montools -type d -empty |xargs rmdir -v

# notifications

. /usr/lib/montools/mt-notify-exit

