#!/bin/bash
lockfile=/var/lock/subsys/os-cgroups

# import openshift node configuration
if [ -f /etc/openshift/node.conf ]
then
    . /etc/openshift/node.conf
fi

# import resource limit tuning values for cgroups
if [ -f /etc/openshift/resource_limits.conf ]
then
    . /etc/openshift/resource_limits.conf
fi

RETVAL=0
GROUP_RETVAL=0

# Test if this host uses systemd or init
function is_systemd() {
    test $(ps --noheaders -ocomm 1) = "systemd"
}

#
# Set defaults if not provided
#
GEAR_GECOS=${GEAR_GECOS:="OpenShift guest"}

OPENSHIFT_CGROUP_ROOT=${OPENSHIFT_CGROUP_ROOT:="/openshift"}
OPENSHIFT_CGROUP_SUBSYSTEMS=${OPENSHIFT_CGROUP_SUBSYSTEMS:="cpu,cpuacct,memory,net_cls,freezer"}

CGROUP_RULES_FILE=${CGROUP_RULES_FILE:="/etc/cgrules.conf"}

CPU_VARS="cfs_period_us cfs_quota_us rt_period_us rt_runtime_us shares"
MEM_VARS="limit_in_bytes memsw_limit_in_bytes soft_limit_in_bytes swappiness"

# Get a user's UID
function uid() {
    # USERNAME=$1
    getent passwd | grep -e "^$1:" | cut -d: -f3
}

# ============================================================================
#  Functions for setting the net class
# ============================================================================

#
# Convert an MCS pair into a cgroup net class id
#
function classid() {
    # major: 1, minor UID
    printf "0x1%04x" $1
}

function set_net_cls() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1
    USERID=`uid $1`
    USERCLASSID=`classid $USERID`
    cgset -r net_cls.classid=$USERCLASSID $CGPATH
}

# ==========================================================================
#  Functions for tuning the user's CPU limits in cgroups
# ==========================================================================
CPUVARS="cfs_period_us cfs_quota_us rt_period_us rt_runtime_us shares"
function set_cpu() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    for VARNAME in $CPUVARS
    do
        # cgroups names can have periods(.)  shell varnames can't
        SAFENAME=`echo $VARNAME | tr . _`
        VALUE=`eval echo \\$cpu_$SAFENAME`
        if [ -n "${VALUE}" ]
        then
            # TODO: get per-app increments
            cgset -r "cpu.$VARNAME=$VALUE" $CGPATH
        fi
    done
}

# ==========================================================================
#  Functions for tuning the user's memory limits in cgroups
# ==========================================================================
MEMVARS="limit_in_bytes memsw.limit_in_bytes soft_limit_in_bytes swappiness"
function set_memory() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    # for each var get and set the value
    for VARNAME in $MEMVARS
    do
    # cgroups names can have periods(.)  shell varnames can't
    SAFENAME=`echo $VARNAME | tr . _`
    VALUE=`eval echo \\$memory_$SAFENAME`
    if [ -n "${VALUE}" ]
    then
        # TODO: get per-app increments
        cgset -r "memory.$VARNAME=$VALUE" $CGPATH
    fi
    done
}

# ==========================================================================
#  Functions for tuning the weighting of user's disk IO
# ==========================================================================
BLKIOVARS="weight weight_device"
function set_blkio() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    # for each var get and set the value
    for VARNAME in $BLKIOVARS
    do
        # cgroups names can have periods(.)  shell varnames can't
        SAFENAME=`echo $VARNAME | tr . _`
        VALUE=`eval echo \\$blkio_$SAFENAME`
        if [ -n "${VALUE}" ]
        then
            # TODO: get per-app increments
            # TODO: weight_device should really use the user's home device
            #       and set the rest (if any) to 0
            # cgset -r "blkio.$VARNAME=$VALUE" $CGPATH
            echo nothing >>/dev/null
        fi
    done
}

#
# Freeze a user
#
freeze_user() {
    # USERNAME=$1
    cgset -r freezer.state=FROZEN ${OPENSHIFT_CGROUP_ROOT}/$1
}

thaw_user() {
    cgset -r freezer.state=THAWED ${OPENSHIFT_CGROUP_ROOT}/$1
}

# List the openshift guest users
#
openshift_users() {
    getent passwd | grep ":${GEAR_GECOS}:" | cut -d: -f1
}

valid_user() {
    # check if the user name exists and is tagged as a openshift guest user
    openshift_users | grep -e "^$1\$" >/dev/null 2>&1
}

#
# Check that a cgroup exists for a user
#
cgroup_exists() {
    # USERNAME=$1
    [ "$(lscgroup ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${OPENSHIFT_CGROUP_ROOT}/$1 | wc -l)" != "0" ]
}

#
# Create a new openshift user cgroup
#
add_cgroup() {
    # USERNAME=$1
    cgcreate -t $1:$1 -g ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${OPENSHIFT_CGROUP_ROOT}/$1
}

#
# Delete a openshift user cgroup
#
delete_cgroup() {
    # USERNAME=$1
    cgdelete ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${OPENSHIFT_CGROUP_ROOT}/$1
}


#
# check which user cgroups exist
#
cgroup_user_subsystems() {
    # USERNAME=$1
    lscgroup | grep ":${OPENSHIFT_CGROUP_ROOT}/$1\$" | cut -d: -f1
}

#
# Check that a group binding rule exists for a user
#
cgroup_rule_exists() {
    #USERNAME=$1
    # remove comments, get first field, match exactly, quiet
    grep -v '^#' ${CGROUP_RULES_FILE} | cut -f1 | grep -q -x $1
}


#
# Bind the user to the cgroup: update /etc/cgrules.conf and kick cgred
#
add_cgroup_rule() {
    # USERNAME=$1
    cat <<EOF >>${CGROUP_RULES_FILE}
$1	$OPENSHIFT_CGROUP_SUBSYSTEMS	$OPENSHIFT_CGROUP_ROOT/$1
EOF
}

#
# Unbind the user from any cgroup
#
delete_cgroup_rule() {
    # USERNAME=$1
    sed -i -e "/^$1\s/d" ${CGROUP_RULES_FILE}
}

#
# Add the user's processes to the new group
#
collect_tasks() {
    # USERNAME=$1
    CGPATH=${OPENSHIFT_CGROUP_ROOT}/$1

    # add existing processes to the group
    for PID in $(ps -opid= -u $(id -u $1)) ; do
    cgclassify -g ${OPENSHIFT_CGROUP_SUBSYSTEMS}:${CGPATH} $PID
    done
}

startuser() {
    local _newuser=$1

    local _retval=0
    local _group_retval=0

    add_cgroup $_newuser
    (( _retval |= $? )) # capture return value

    set_cpu $_newuser
    set_memory $_newuser
    #set_blkio $_newuser
    set_net_cls $_newuser

    # CHECK: don't trust old rules
    if ( cgroup_rule_exists $_newuser )
    then
        delete_cgroup_rule $_newuser
    fi

    add_cgroup_rule $_newuser
    (( _retval |= $? )) # capture return value

    collect_tasks $_newuser

    if [ $_retval -ne 0 ]
    then
        (( _group_retval += 1 ))
    fi
    return $_group_retval
}


startall() {
    local _retval=0
    local _group_retval=0

    # Hosts running an OS based on systemd already have cgroups running
    if ! is_systemd
    then

        if !(service cgconfig status >/dev/null)
        then
            _retval=1
            _group_retval=3
            service cgconfig start
            return $_group_retval
        fi

        if !(service cgconfig status >/dev/null)
        then
            _retval=1
            _group_retval=3
            return $_group_retval
        fi

    fi

    # create the root of the openshift user control group
    add_cgroup # defaults to creating the root group
    _retval=$?
    [ $_retval -eq 0 ] || return $_retval

    # This won't scale forever, but works fine in the '100 or so' range
    for USERNAME in `openshift_users`
    do
        startuser $USERNAME
    done

    # kick the Cgroups rules daemon
    #service cgred reload
    pkill -USR2 cgrulesengd

    [ $_group_retval -eq 0 ] && touch ${lockfile}

    return $_group_retval
}

stopuser() {
    local _deluser=$1

    local _retval=0
    local _group_retval=0

    # kill any processes owned by these users
    #pkill -u $(id -u $_deluser)

    # remove the user's cgroup
    if cgroup_exists $_deluser
    then
        delete_cgroup $_deluser
        (( _retval |= $? )) # capture return value

        # remove the user's cgroup binding rule
        delete_cgroup_rule $_deluser
        (( _retval |= $? )) # capture return value
    fi

    if [ $_retval -ne 0 ]
    then
        (( _group_retval += 1 ))
    fi
    return $_group_retval
}

stopall() {

    local _retval=0
    local _group_retval=0

    if ! is_systemd
    then
        if !(service cgconfig status >/dev/null)
        then
            _retval=1
            _group_retval=3

            return $_group_retval
        fi
    fi

    # This won't scale forever, but works fine in the '100 or so' range
    for USERNAME in `openshift_users`
    do
        stopuser $USERNAME
    done

    # notify the cgroup rule daemon
    #service cgred reload
    pkill -USR2 cgrulesengd

    # remove the openshift root cgroup
    delete_cgroup

    if [ $_retval -ne 0 ]
    then
        (( _group_retval += 1 ))
    fi

    [ $_group_retval -eq 0 ] && touch ${lockfile}
    return $_group_retval
}

restartall() {

    local _group_retval=0

    stopall
    (( _group_retval += $? ))
    startall
    (( _group_retval += $? ))
    return $_group_retval
}

status() {
    local _retval=0
    local _group_retval=0

    if lscgroup | grep -e  ":${OPENSHIFT_CGROUP_ROOT}\$" >/dev/null 2>&1
    then
        echo "Openshift cgroups initialized"
    else
        echo "Openshift cgroups uninitialized"
        echo
        return 1
    fi

    if [ -z "$1" ]
    then
        USERLIST=`openshift_users`
    else
        USERLIST=$1
    fi

    # check that the /openshift cgroup exists

    # This won't scale forever, but works fine in the '100 or so' range
    #  would be easy to convert to a 'in `find...`'     jj
    for USERNAME in $USERLIST
    do
        # check that /openshift/<username> exists
        SUBSYSTEMS=`cgroup_user_subsystems`
        if ( cgroup_rule_exists $USERNAME )
        then
            _retval=0
            BOUND="BOUND"
        else
            _retval=1
            BOUND="UNBOUND"
        fi

        if [ $_retval -ne 0 ]
        then
            (( _group_retval += 1 ))
        fi
    done
    return $_group_retval
}

#
# Find all users (or the specified user) which don't have cgroup
# containers and create containers for them
#
repair() {
    local _group_retval=0

    # Iterate over the user-supplied list, or default to the list of
    # all users
    if [ -z "$1" ]
    then
        USERLIST=$(openshift_users)
    else
        USERLIST=$1
    fi

    for USERNAME in $USERLIST
    do
        if ! cgroup_exists $USERNAME
        then
            startuser $USERNAME
            (( _group_retval += $? ))
        fi
    done
    return $_group_retval
}

case "$1" in
    startall)
        startall
        RETVAL=$?
        ;;

    stopall)
        stopall
        RETVAL=$?
        ;;

    restartall)
        restartall
        RETVAL=$?
        ;;

    repair)
        repair $2
        RETVAL=$?
        ;;

    condrestartall)
        [ -f "$lockfile" ] && restartall
        RETVAL=$?
        ;;

    status)
        status $2
        RETVAL=$?
        ;;

    startuser)
        if is_systemd || service cgconfig status >/dev/null 2>&1
        then
            startuser $2
            RETVAL=$?
            pkill -USR2 cgrulesengd
        else
            RETVAL=1
        fi
        ;;

    stopuser)
        if is_systemd || service cgconfig status >/dev/null 2>&1
        then
            stopuser $2
            RETVAL=$?
            pkill -USR2 cgrulesengd
        else
            RETVAL=1
        fi
        ;;

    freezeuser)
        freeze_user $2
        RETVAL=$?
        ;;

    thawuser)
        thaw_user $2
        RETVAL=$?
        ;;

    *)
        echo "Usage: $0 {startall|stopall|restartall|repair [<username>]|condrestartall|status [<username>]|startuser <username>|stopuser <username>|freezeuser <username>|thawuser <username>}"
        exit 1
esac

exit $RETVAL
