#! /bin/bash
#
# chkconfig: 345 05 47
# description: ProcMgr is an Arastra tool that keeps our daemons running.
# processname: ProcMgr
# config: /etc/ProcMgr.conf

# Note: Please dont add 'pidfile:' Redhat Header for ProcMgr SysVInitscript.
# So that systemd doesn't know about the mainPid for ProcMgr.
# Otherwise, as soon as mainPid is killed systemd tries to kill
# all the managed processes by mainPid as on finding pidfile systemd
# sets RemainAfterExit=No for it. Due to which on running 
# "service ProcMgr stoppm" would call "service ProcMgr stop"
# and "service ProcMgr stop" was ensuring that it killed itself
# and its managed processes by clearing the subdirectories under
# "/etc/ProcMgr.d/run".


export SYSTEMCTL_SKIP_REDIRECT=1

. /etc/rc.d/init.d/functions

runlevel=$(set -- $(runlevel); eval "echo \$$#" )

start() {
    echo -n $"Starting ProcMgr: "

    grep 'manufacturing' /etc/celltype >> /dev/null 2>&1
    if [ $? -eq 0 ]; then 
        # For manufacturing, no need to run ProcMgr when EOS is disabled.
        echo -n "Manufacturing cell detected."
        success
        echo
        exit 0
    fi

    if [ $UID -ne 0 ]; then
        RETVAL=1
    else
        ulimit -c unlimited
        export DAEMON_COREFILE_LIMIT=unlimited
        export TMPDIR=/var/tmp/agents/
        export PROCMGR_STDOUTDUMPDIR=/var/log/agents/
        export TERM=dumb
        export QUICKTRACEDIR=/var/log/qt
        mkdir -p $QUICKTRACEDIR
        # Setting GLIBCXX_FORCE_NEW prevents stdlibc++ in all our agents from using
        # type-specific allocators for STL container types (which waste a lot of
        # memory to achieve a dubious performance gain).
        export GLIBCXX_FORCE_NEW=1
        # disable the OOM killer. The oom_score_adj value is inherited in all child
        # processes, but netnsd-server, netnsd-watcher, and netnsd-session are all
        # pretty lightweight, so it should be okay.
        echo -n "-1000" > /proc/self/oom_score_adj
        FASTLOAD=$(python -c 'import Toggles.ProcMgrToggleLib as t; print t.toggleFastloadEnabled()')
        if [ $FASTLOAD -ne 0 ]; then
            PROCMGR=procmgr
            # if ProcMgr is already running, don't try to run it. Return success.
            netns -q $PROCMGR > /dev/null 2>&1
            RETVAL=$?
            if [ $RETVAL -ne 0 ]; then
                /usr/bin/netnsd -d -i --dlopen -p -f "" -l libLoadDynamicLibs.so $PROCMGR libProcMgrSetup.so --daemonize
                RETVAL=$?
                logEosStarted
                [ $RETVAL -eq 0 ] && touch /var/lock/subsys/ProcMgr
            fi
        else
            # if ProcMgr is already running, don't try to run it. Return success.
            pidof ProcMgr-master >& /dev/null
            RETVAL=$?
            if [ $RETVAL -ne 0 ]; then
                logEosStarted
                # ProcMgr uses its own lock to guarantee single instance
                ProcMgr --daemonize
                RETVAL=$?
                [ $RETVAL -eq 0 ] && touch /var/lock/subsys/ProcMgr
            fi
        fi
    fi
    # if PPID is not 1, that means we are starting with the knowledge of systemd.
    #  In that case update systemd about it. It might seem strange that we are using
    # 'start' here instead of 'daemon-reload'. But daemon-reload is not working for
    # SYSV service. When we use 'start' systemd checks if the PID exists, and if so
    # simply loads the new PID.
    if [ $RETVAL == 0 ] && [ $PPID != 1 ];
    then
       /bin/systemctl start ProcMgr > /dev/null 2>&1
    fi
    if [ $RETVAL == 0 ];
    then
        success
    else
        failure
    fi
    echo
    return $RETVAL
}

waitForProcMgrTermination() {
    # arbitrarily wait up to 30 seconds for watcher and worker to terminate.
    waitCount=0
    while pgrep 'netnsd-watcher|ProcMgr-worker' >& /dev/null &&
              [ "$waitCount" -lt 30 ]; do
        sleep 1
        waitCount="$((waitCount + 1))"
    done
    if [ $waitCount -eq 30 ]; then
        echo $"Wait time for process termination exceeds 30 seconds"
        return 2
    fi
    return 0
}

stop() {
    echo -n $"Stopping ProcMgr and managed processes: "
    logger "Stopping ProcMgr and managed processes"
    if [ $UID -ne 0 ]; then
        RETVAL=1
        failure
        echo
    else
        # Run stop helper scripts.
        for file in /etc/ProcMgr.d/scripts/stop/*
        do
            logger "Executing stop script $file"
            "$file" >/dev/null 2>&1 || :
        done
        
        # Send ProcMgr a SIGTERM so it kills all its children.
        pkill netnsd-watcher
        pkill ProcMgr-worker
        RETVAL=$?
        waitForProcMgrTermination
        WAITRETVAL=$?
        if [ $WAITRETVAL -ne 0 ]; then
            RETVAL=$WAITRETVAL
        fi
        if [ $RETVAL -eq 0 ]; then
            rm -f /var/lock/subsys/ProcMgr
            rm -f /var/run/ProcMgr.pid
            success
        else
            failure
        fi
        echo
        cleanEtcProcMgr
        # Remove all backups.  This is to preserve the behavior where Sysdb death
        # causes data loss of all agents.
        cleanBackup

        # If no process found, signal to user that we failed, but return 0.
        # Otherwise, some Launcher tests will fail
        if [ $RETVAL -eq 1 ]; then
            RETVAL=0
        fi
    fi;
    return $RETVAL
}

reload() {
    trap "" SIGHUP

    procMgrPid=`cat /var/run/ProcMgr.pid 2>/dev/null`
    if [ $? -eq 0 ]; then
       kill -0 $procMgrPid 2>&1 | grep "No such process" > /dev/null
       # if ProcMgr does not exist, don't attempt to do a warm start, as ProcMgr
       # restart does a warm start during start up.
       if [ $? -ne 0 ]; then
          # BUG4137: Do *NOT* use "killall -HUP 'ProcMgr-worker'" here,
          #          because that introduces race conditions that could
          #          cause spurious process restarts, especially at system
          #          boot time.
          kill -HUP $procMgrPid
       fi
    fi
}

cleanEtcProcMgr() {
    echo $"Removing all files in all subdirs of /etc/ProcMgr.d/run"
    find /etc/ProcMgr.d/run -type f | xargs rm -f  || :
}

cleanBackup() {
   if [ -e "/var/shmem/shmem/backup" ]
      then
         echo $"Removing all files for backup"
         find /var/shmem/shmem/backup -type f | xargs rm -rf || :
   fi
}

clean() {
    logger "'service ProcMgr clean' cleaning up."

    echo $"Removing all files in /var/core"
    find /var/core -type f | xargs rm -f || :

    echo $"Removing all files in /var/log/agents"
    find /var/log/agents -type f | xargs rm -f || :

    echo $"Removing all files in /var/log/qt"
    find /var/log/qt -type f | xargs rm -f || :

    cleanEtcProcMgr
    cleanBackup
}

stoppm() {
    echo -n $"Stopping ProcMgr (but not managed processes): "
    logger "Stopping ProcMgr (but not managed processes)"
    if [ $UID -ne 0 ]; then
        RETVAL=1
    else
        # Run stoppm helper scripts.
        for file in /etc/ProcMgr.d/scripts/stoppm/*
        do
            logger "Executing stoppm script $file"
            "$file" >/dev/null 2>&1 || :
        done

        # Shut down both the ProcMgr master and the ProcMgr worker,
        # without shutting down the managed processes. This is useful
        # when upgrading the ProcMgr itself.
        #
        # In order to prevent ProcMgr to kill its children, kill off
        # the master first, and then the worker, using SIGKILL.        
        pkill netnsd-watcher
        
        killall -KILL 'ProcMgr-master'
        RETVAL=$?
        [ $RETVAL -eq 0 ]

        killall -KILL 'ProcMgr-worker'
        RETVAL=$?
        [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ProcMgr
        [ $RETVAL -eq 0 ] && rm -f /var/run/ProcMgr.pid
        waitForProcMgrTermination
        WAITRETVAL=$?
        if [ $WAITRETVAL -ne 0 ]; then
            RETVAL=$WAITRETVAL
        fi
    fi;
    if [ $RETVAL == 0 ];
    then
        success
    else
        failure
    fi
    echo
    return $RETVAL
}


restart() {
    stop
    if [ $? -eq 0 ]; then
      start
    else
      echo $"Can't successfully stop ProcMgr. Aborting ProcMgr start."
    fi
}

cleanrestart() {
    stop
    if [ $? -eq 0 ]; then
      clean
      start
    else
      echo $"Can't successfully stop ProcMgr. Aborting ProcMgr clean and start."
    fi
}

case "$1" in
start)
    start
    ;;
stop)
    stop
    ;;
reload)
    reload
    ;;
clean)
    clean
    ;;
stoppm)
    stoppm
    ;;
restart)
    restart
    ;;
cleanrestart)
    cleanrestart
    ;;
condrestart)
    if [ -f /var/lock/subsys/ProcMgr ]; then
        restart
    fi
    ;;
status)
    status ProcMgr
    ;;
*)
    echo $"Usage: $0 {start|stop|reload|clean|stoppm|status|restart|condrestart|cleanrestart}"
    exit 1
esac
