#!/bin/bash
lockfile=/var/lock/subsys/os-cgroups

# import openshift node configuration
if [ -f /etc/openshift/node.conf ]
then
    . /etc/openshift/node.conf
fi

# import resource limit tuning values for cgroups
if [ -f /etc/openshift/resource_limits.conf ]
then
    . /etc/openshift/resource_limits.conf
fi

RETVAL=0
GROUP_RETVAL=0

# Test if this host uses systemd or init
function is_systemd() {
    test $(ps --noheaders -ocomm 1) = "systemd"
}

#
# Set defaults if not provided
#
GEAR_GECOS=${GEAR_GECOS:="OpenShift guest"}

OPENSHIFT_CGROUP_ROOT=${OPENSHIFT_CGROUP_ROOT:="/openshift"}
OPENSHIFT_CGROUP_SUBSYSTEMS=${OPENSHIFT_CGROUP_SUBSYSTEMS:="cpu,cpuacct,memory,net_cls,freezer"}

CGROUP_RULES_FILE=${CGROUP_RULES_FILE:="/etc/cgrules.conf"}

CPU_VARS="cfs_period_us cfs_quota_us rt_period_us rt_runtime_us shares"
MEM_VARS="limit_in_bytes memsw_limit_in_bytes soft_limit_in_bytes swappiness"

# Get a user's UID
function uid() {
    # USERNAME=$1
    getent passwd | grep -e "^$1:" | cut -d: -f3
}

# ============================================================================
#  Functions for setting the net class
# ============================================================================

#
# Convert an MCS pair into a cgroup net class id
#
function classid() {
    # major: 1, minor UID
    printf "0x1%04x" $1
}

function set_net_cls() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1
    USERID=`uid $1`
    USERCLASSID=`classid $USERID`
    cgset -r net_cls.classid=$USERCLASSID $CGPATH
}

# ==========================================================================
#  Functions for tuning the user's CPU limits in cgroups
# ==========================================================================
CPUVARS="cfs_period_us cfs_quota_us rt_period_us rt_runtime_us shares"
function set_cpu() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    for VARNAME in $CPUVARS
    do
        # cgroups names can have periods(.)  shell varnames can't
        SAFENAME=`echo $VARNAME | tr . _`
        VALUE=`eval echo \\$cpu_$SAFENAME`
        if [ -n "${VALUE}" ]
        then
            # TODO: get per-app increments
            cgset -r "cpu.$VARNAME=$VALUE" $CGPATH
        fi
    done
}

# ==========================================================================
#  Functions for tuning the user's memory limits in cgroups
# ==========================================================================
MEMVARS="limit_in_bytes memsw.limit_in_bytes soft_limit_in_bytes swappiness"
function set_memory() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    # for each var get and set the value
    for VARNAME in $MEMVARS
    do
    # cgroups names can have periods(.)  shell varnames can't
    SAFENAME=`echo $VARNAME | tr . _`
    VALUE=`eval echo \\$memory_$SAFENAME`
    if [ -n "${VALUE}" ]
    then
        # TODO: get per-app increments
        cgset -r "memory.$VARNAME=$VALUE" $CGPATH
    fi
    done
}

# ==========================================================================
#  Functions for tuning the weighting of user's disk IO
# ==========================================================================
BLKIOVARS="weight weight_device"
function set_blkio() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    # for each var get and set the value
    for VARNAME in $BLKIOVARS
    do
        # cgroups names can have periods(.)  shell varnames can't
        SAFENAME=`echo $VARNAME | tr . _`
        VALUE=`eval echo \\$blkio_$SAFENAME`
        if [ -n "${VALUE}" ]
        then
            # TODO: get per-app increments
            # TODO: weight_device should really use the user's home device
            #       and set the rest (if any) to 0
            # cgset -r "blkio.$VARNAME=$VALUE" $CGPATH
            echo nothing >>/dev/null
        fi
    done
}

#
# Freeze a user
#
freeze_user() {
    # USERNAME=$1
    cgset -r freezer.state=FROZEN ${OPENSHIFT_CGROUP_ROOT}/$1
}

thaw_user() {
    cgset -r freezer.state=THAWED ${OPENSHIFT_CGROUP_ROOT}/$1
}

# List the openshift guest users
#
openshift_users() {
    getent passwd | grep ":${GEAR_GECOS}:" | cut -d: -f1
}

valid_user() {
    # check if the user name exists and is tagged as a openshift guest user
    openshift_users | grep -e "^$1\$" >/dev/null 2>&1
}

#
# Check that a cgroup exists for a user
#
cgroup_exists() {
    # USERNAME=$1
    [ "$(lscgroup ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${OPENSHIFT_CGROUP_ROOT}/$1 | wc -l)" != "0" ]
}

#
# Create a new openshift user cgroup
#
add_cgroup() {
    # USERNAME=$1
    cgcreate -t $1:$1 -g ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${OPENSHIFT_CGROUP_ROOT}/$1
}

#
# Delete a openshift user cgroup
#
delete_cgroup() {
    # USERNAME=$1
    cgdelete ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${OPENSHIFT_CGROUP_ROOT}/$1
}


#
# check which user cgroups exist
#
cgroup_user_subsystems() {
    # USERNAME=$1
    lscgroup | grep ":${OPENSHIFT_CGROUP_ROOT}/$1\$" | cut -d: -f1
}

#
# Check that a group binding rule exists for a user
#
cgroup_rule_exists() {
    #USERNAME=$1
    # remove comments, get first field, match exactly, quiet
    grep -v '^#' ${CGROUP_RULES_FILE} | cut -f1 | grep -q -x $1
}


#
# Bind the user to the cgroup: update /etc/cgrules.conf and kick cgred
#
add_cgroup_rule() {
    # USERNAME=$1
    cat <<EOF >>${CGROUP_RULES_FILE}
$1	$OPENSHIFT_CGROUP_SUBSYSTEMS	$OPENSHIFT_CGROUP_ROOT/$1
EOF
}

#
# Unbind the user from any cgroup
#
delete_cgroup_rule() {
    # USERNAME=$1
    sed -i -e "/^$1\s/d" ${CGROUP_RULES_FILE}
}

#
# Add the user's processes to the new group
#
collect_tasks() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    # add existing processes to the group
    for PID in $(ps -opid= -u $(id -u $1)) ; do
    cgclassify -g ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${CGPATH} $PID
    done
}

startuser() {
    local _newuser=$1

    local _retval=0
    local _group_retval=0

    echo -n "starting cgroups for ${_newuser}..."

    add_cgroup $_newuser
    (( _retval |= $? )) # capture return value

    set_cpu $_newuser
    set_memory $_newuser
    #set_blkio $_newuser
    set_net_cls $_newuser

    # CHECK: don't trust old rules
    if ( cgroup_rule_exists $_newuser )
    then
        delete_cgroup_rule $_newuser
    fi

    add_cgroup_rule $_newuser
    (( _retval |= $? )) # capture return value

    collect_tasks $_newuser

    if [ $_retval -eq 0 ]
    then
        echo -n " [OK] "
    else
        (( _group_retval += 1 ))
        echo -n " [FAILED] "
    fi
    echo
    return $_group_retval
}


startall() {
    echo "Initializing Openshift guest control groups: "

    local _retval=0
    local _group_retval=0

    # Hosts running an OS based on systemd already have cgroups running
    if ! is_systemd
    then

        if !(service cgconfig status >/dev/null)
        then
            _retval=1
            _group_retval=3
            echo "cgconfig service not running. attempting to start it"
            service cgconfig start
            return $_group_retval
        fi

        if !(service cgconfig status >/dev/null)
        then
            _retval=1
            _group_retval=3
            echo "cgconfig service not running."
            return $_group_retval
        fi

    fi

    # create the root of the openshift user control group
    add_cgroup # defaults to creating the root group
    _retval=$?
    [ $_retval -eq 0 ] || return $_retval

    # This won't scale forever, but works fine in the '100 or so' range
    for USERNAME in `openshift_users`
    do
        startuser $USERNAME
        (( _group_retval += $? ))
    done

    # kick the Cgroups rules daemon
    #service cgred reload
    pkill -USR2 cgrulesengd

    [ $_group_retval -eq 0 ] && touch ${lockfile}
    [ $_group_retval -eq 0 ] && (echo -n "[ OK ]") || (echo -n "[ FAILED ]")

    echo -n "Openshift cgroups initialized"
    echo
    echo
    echo "WARNING !!! WARNING !!! WARNING !!!"
    echo "Cgroups may have just restarted.  It's important to confirm all the openshift apps are actively running."
    echo "It's suggested you run service openshift restart now"
    echo "WARNING !!! WARNING !!! WARNING !!!"
    echo
    return $_group_retval
}

stopuser() {
    local _deluser=$1

    local _retval=0
    local _group_retval=0

    echo -n "stopping cgroups for ${_deluser}..."

    # kill any processes owned by these users
    #pkill -u $(id -u $_deluser)

    # remove the user's cgroup
    if cgroup_exists $_deluser
    then
        delete_cgroup $_deluser
        (( _retval |= $? )) # capture return value

        # remove the user's cgroup binding rule
        delete_cgroup_rule $_deluser
        (( _retval |= $? )) # capture return value
    else
        echo -n " cgroup already stopped"
    fi

    if [ $_retval -eq 0 ]
    then
        echo -n " [OK] "
    else
        (( _group_retval += 1 ))
        echo -n " [FAILED] "
    fi
    echo
    return $_group_retval
}

stopall() {
    echo "Removing Openshift guest control groups: "

    local _retval=0
    local _group_retval=0

    if ! is_systemd
    then
        if !(service cgconfig status >/dev/null)
        then
            _retval=1
            _group_retval=3
            echo "cgconfig service not running"

            return $_group_retval
        fi
    fi

    # This won't scale forever, but works fine in the '100 or so' range
    for USERNAME in `openshift_users`
    do
        stopuser $USERNAME
        (( _group_retval += $? ))
    done

    # notify the cgroup rule daemon
    #service cgred reload
    pkill -USR2 cgrulesengd

    # remove the openshift root cgroup
    delete_cgroup

    if [ $_retval -eq 0 ]
    then
        echo -n "[ OK ]"
    else
        (( _group_retval += 1 ))
        echo -n "[ FAILED ]"
    fi

    [ $_group_retval -eq 0 ] && touch ${lockfile}
    echo -n "Openshift cgroups uninitialized"
    echo
    return $_group_retval
}

restartall() {

    local _group_retval=0

    stopall
    (( _group_retval += $? ))
    startall
    (( _group_retval += $? ))
    return $_group_retval
}

status() {
    echo "Checking Openshift Services: "

    local _retval=0
    local _group_retval=0

    if lscgroup | grep -e  ":${OPENSHIFT_CGROUP_ROOT}\$" >/dev/null 2>&1
    then
        echo "Openshift cgroups initialized"
    else
        echo "Openshift cgroups uninitialized"
        echo
        return 1
    fi

    if [ -z "$1" ]
    then
        USERLIST=`openshift_users`
    else
        USERLIST=$1
    fi

    # check that the /openshift cgroup exists

    # This won't scale forever, but works fine in the '100 or so' range
    #  would be easy to convert to a 'in `find...`'     jj
    for USERNAME in $USERLIST
    do
        # check that /openshift/<username> exists
        SUBSYSTEMS=`cgroup_user_subsystems`
        if ( cgroup_rule_exists $USERNAME )
        then
            _retval=0
            BOUND="BOUND"
        else
            _retval=1
            BOUND="UNBOUND"
        fi

        echo -n "${USERNAME}: $BOUND    " `echo $SUBSYSTEMS | tr ' ' ,`
        # check that cgrule exists

        if [ $_retval -eq 0 ]
        then
            echo -n "[ OK ]"
        else
            (( _group_retval += 1 ))
            echo -n "[ FAILED ]"
        fi
        echo
    done
    return $_group_retval
}

#
# Find all users (or the specified user) which don't have cgroup
# containers and create containers for them
#
repair() {
    local _group_retval=0

    # Iterate over the user-supplied list, or default to the list of
    # all users
    if [ -z "$1" ]
    then
        USERLIST=$(openshift_users)
    else
        USERLIST=$1
    fi

    for USERNAME in $USERLIST
    do
        if ! cgroup_exists $USERNAME
        then
            startuser $USERNAME
            (( _group_retval += $? ))
        fi
    done
    return $_group_retval
}

case "$1" in
    startall)
        startall
        RETVAL=$?
        ;;

    stopall)
        stopall
        RETVAL=$?
        ;;

    restartall)
        restartall
        RETVAL=$?
        ;;

    repair)
        repair $2
        RETVAL=$?
        ;;

    condrestartall)
        [ -f "$lockfile" ] && restartall
        RETVAL=$?
        ;;

    status)
        status $2
        RETVAL=$?
        ;;

    startuser)
        if is_systemd || service cgconfig status >/dev/null 2>&1
        then
            startuser $2
            RETVAL=$?
            pkill -USR2 cgrulesengd
        else
            RETVAL=1
            echo "cgconfig service is not running"
        fi
        ;;

    stopuser)
        if is_systemd || service cgconfig status >/dev/null 2>&1
        then
            stopuser $2
            RETVAL=$?
            pkill -USR2 cgrulesengd
        else
            RETVAL=1
            echo "cgconfig service is not running"
        fi
        ;;

    freezeuser)
        freeze_user $2
        RETVAL=$?
        ;;

    thawuser)
        thaw_user $2
        RETVAL=$?
        ;;

    *)
        echo "Usage: $0 {startall|stopall|restartall|repair [<username>]|condrestartall|status [<username>]|startuser <username>|stopuser <username>|freezeuser <username>|thawuser <username>}"
        exit 1
esac

exit $RETVAL
