#!/usr/bin/env oo-ruby

#--
# Copyright 2012 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++


require 'rubygems'
require 'openshift-origin-common'
require 'optparse'
require 'socket'
require 'json'
require 'etc'
require 'safe_yaml'
require 'facter'
require 'set'

$OPTIONS = {}

optparse = OptionParser.new { |opts|
  opts.banner = "\nUsage: #{$0}\n"+
                "\nExample: #{$0} -v\n\n"

    $OPTIONS[:verbose] = false
    opts.on('-v','--verbose', 'Print verbose statements') { |verbose|
        $OPTIONS[:verbose] = verbose
    }

    $OPTIONS[:timeout] = 60
    opts.on('-t n','--timeout n', OptionParser::DecimalInteger, 'length of timeout') { |timeout|
      $OPTIONS[:timeout] = timeout
    }

    $OPTIONS[:'run-upgrade-checks'] = false
    opts.on('--run-upgrade-checks', 'Perform only checks for upgrade consistency') { |checks|
      $OPTIONS[:'run-upgrade-checks'] = checks
    }
  }
optparse.parse!

def verbose(msg)
    if $OPTIONS[:verbose]
        $stdout.write("INFO: #{msg}\n")
    end
end

###### extension points - may be affected by loading of extension ######

# Return the name of the selinux policy to validate
def selinux_policy_name
    'openshift-origin'
end

# Return the list of packages to validate
def ext_packages
    []
end

###### end extension points #####

begin
  require '/usr/libexec/openshift/lib/admin-accept-node-ext'
rescue LoadError => e
  verbose("using default accept-node extensions")
end

$TC_CHECK=true

$CONF_DIR="/etc/openshift"
$NODE_CONF_FILE=File.join($CONF_DIR, "/node.conf")
$RESOURCE_LIMITS_FILE=File.join($CONF_DIR,"/resource_limits.conf")
$LIMITS_CONF_DIR="/etc/security/limits.d"
#
# Control variables.  Override for testing
#

poly_sebool = "allow_polyinstantiation:on"
if %x[/usr/sbin/getsebool -a] =~ /polyinstantiation_enabled/
  poly_sebool = "polyinstantiation_enabled:on"
end
DEFAULT_SEBOOL_LIST="httpd_can_network_connect:on #{poly_sebool}"

DEFAULT_PACKAGES="rubygem-openshift-origin-node openshift-origin-node-util \
    rubygem-openshift-origin-common selinux-policy selinux-policy-targeted"

%x[/bin/rpm -q ruby193-mcollective &>/dev/null]
scl_prefix = $?.exitstatus == 0 ? "ruby193-" : ""

DEFAULT_SERVICES="#{scl_prefix}mcollective cgconfig cgred httpd oddjobd"
DEFAULT_SERVICE_CONTEXTS="#{scl_prefix}mcollective='unconfined_u:system_r:openshift_initrc_t:s0-s0:c0.c1023' oddjobd='system_u:system_r:oddjob_t:s0-s0:c0.c1023'"
DEFAULT_MINSEM=512
V1_DIRECTORIES=%w(ruby-1.8 ruby-1.9 nodejs-0.6
                  jbosseap-6.0 jbossews-1.0 jbossews-2.0 jbossas-7
                  php-5.3 python-2.6 python-2.7 python-3.3 10gen-mms-agent-0.1
                  metrics-0.1 jenkins-1.4 jenkins-client-1.4 perl-5.10 zend-5.6
                  mysql-5.1 postgresql-8.4 mongodb-2.2 rockmongo-1.1 phpmyadmin-4
                  diy-0.1 haproxy-1.4 cron-1.4)

$SEBOOL_LIST="SEBOOL_LIST"
$PACKAGES="PACKAGES"
$SERVICES="SERVICES"
$SERVICE_CONTEXTS="SERVICE_CONTEXTS"
$MINSEM="MINSEM"


####### GLOBAL VARS ########
$CGROUP_PASS=true
$TC_PASS=true
###########################

def eputs(msg)
    $stderr.write("#{msg}\n")
end

######## UTILITIES ########

# Use user_fail to screen out messages for users which are in
# transition during the run of this script.
#
def user_fail(uuid, msg)

  # Did a create/destroy hook get modified for the user?
  begin
    if File.stat("/var/lock/oo-create.#{uuid}").mtime >= $START_TIME
      return
    end
  rescue Errno::ENOENT
  end

  user_ents = $USERS.select { |u| (u.name == uuid) or (u.uid == uuid) }

  pw_ent = nil
  begin
    pw_ent = Etc.getpwnam(uuid)
  rescue ArgumentError
  end

  # Testing if we were fed a uid instead of a uuid.
  if pw_ent.nil?
    begin
      pw_ent = Etc.getpwuid(uuid.to_i)
    rescue ArgumentError
    rescue RangeError
    end
  end

  if not pw_ent.nil?
    if pw_ent.gecos != $CONF.get('GEAR_GECOS')
      pw_ent = nil
    end
  end

  # Did the password entry come/go during the script run?
  if user_ents.empty? != pw_ent.nil?
    return
  end

  # Was the home directory modified during the script run?
  begin
    if File.stat(pw_ent.dir).mtime >= $START_TIME
      return
    end
  rescue Errno::ENOENT
  rescue NoMethodError
  rescue ArgumentError
  end

  do_fail(msg)
end

def do_fail(msg)
    eputs("FAIL: " + msg)
    $STATUS += 1
end

###############################

def load_node_conf
    verbose("loading node configuration file #{OpenShift::Config::NODE_CONF_FILE}")
    $CONF = OpenShift::Config.new OpenShift::Config::NODE_CONF_FILE

    # make sure required values are set in the conf file
    %w[
      GEAR_BASE_DIR GEAR_SKEL_DIR GEAR_SHELL GEAR_GECOS
      GEAR_MIN_UID GEAR_MAX_UID CARTRIDGE_BASE_PATH
      PROXY_MIN_PORT_NUM PROXY_PORTS_PER_GEAR OPENSHIFT_HTTP_CONF_DIR
      CLOUD_DOMAIN BROKER_HOST PUBLIC_IP PUBLIC_HOSTNAME
    ].each do |value|
      next unless $CONF.get(value).nil? || $CONF.get(value).empty?
      do_fail("SEVERE: in #{OpenShift::Config::NODE_CONF_FILE}, #{value} not defined")
    end

    $GEAR_BASE_DIR = $CONF.get('GEAR_BASE_DIR')
    do_fail("GEAR_BASE_DIR does not exist or is not a directory: #{$GEAR_BASE_DIR}") unless File.exists? $GEAR_BASE_DIR

    $EXTERNAL_ETH_DEV = ($CONF.get('EXTERNAL_ETH_DEV') or 'eth0')

    verbose("loading resource limit file #{$RESOURCE_LIMITS_FILE}")
    do_fail("No resource limits file: #{$RESOURCE_LIMITS_FILE}") unless File.exists? $RESOURCE_LIMITS_FILE
    $RESOURCE_LIMITS = OpenShift::Config.new $RESOURCE_LIMITS_FILE
end


def find_ext_net_dev
  verbose("finding external network device")

  begin
    %x[/sbin/ip link show dev #{$EXTERNAL_ETH_DEV}]
    if not $?.success?
      do_fail("SEVERE: not a valid ethernet device: #{$EXTERNAL_ETH_DEV}")
    end
  rescue Errno::ENOENT
    do_fail("SEVERE: could not find ip command (/sbin/ip).")
  end
end

def load_users
  # Narrow the window where USERS, TC_DATA and ALL_QUOTAS can reflect
# the system in different states.
  $START_TIME=Time.now
  $TC_DATA = %x[/sbin/tc clas show dev #{$EXTERNAL_ETH_DEV}].split("\n").grep(/parent/)
  $ALL_QUOTAS = %x[/usr/sbin/repquota -a].split("\n")

  # Aggregate per-user hash of lscgroup data. In some cases, multiple subsystems
  # for a user are displayed on a single line. Other times, they are split.
  # This handles both cases using Set merging.
  $CGROUP_DATA = Hash.new{|hash, key| hash[key] = Set.new()}
  %x["/bin/lscgroup"].split.each { |entry|
    subsystems,uuid = entry.split(':')
    uuid.match('/openshift/(.+)') { |m| $CGROUP_DATA[m[1]].merge(subsystems.split(',')) }
  }

  $USERS=[]
  Etc.passwd { |u| $USERS << u if u.gecos == $CONF.get('GEAR_GECOS') }
end

def validate_env
  if ENV[$SEBOOL_LIST].nil?
      $SEBOOL_LIST=DEFAULT_SEBOOL_LIST
  else
      eputs "WARNING: ENV overrides SEBOOL_LIST"
  end

  if ENV[$PACKAGES].nil?
      $PACKAGES=DEFAULT_PACKAGES
  else
      eputs "WARNING: ENV overrides PACKAGES"
  end

  if ENV[$SERVICES].nil?
      $SERVICES=DEFAULT_SERVICES
  else
      eputs "WARNING: ENV overrides SERVICES"
  end

  if ENV[$SERVICE_CONTEXTS].nil?
      $SERVICE_CONTEXTS=DEFAULT_SERVICE_CONTEXTS
  else
      eputs "WARNING: ENV overrides SERVICE_CONTEXTS"
  end


  if ENV[$MINSEM].nil?
      $MINSEM=DEFAULT_MINSEM
  else
      eputs "WARNING: ENV overrides MINSEM"
  end
end

def check_node_public_resolution
    verbose("checking node public hostname resolution")

    localhost = %w[127.0.0.1 ::1]
    pubhost = $CONF.get("PUBLIC_HOSTNAME")
    pubip = $CONF.get("PUBLIC_IP")
    broker = $CONF.get("BROKER_HOST")

    # check that the broker host resolves to an external IP
    begin
      resolved_host = IPSocket.getaddress(broker)
      # on devenv or livecd, BROKER_HOST=localhost is considered legit. Will let that be for now.
      #localhost.member?(resolved_host) and
        #do_fail("#{$NODE_CONF_FILE}: BROKER_HOST #{broker} should be public address, not localhost") 
    rescue Exception => e
      do_fail("#{$NODE_CONF_FILE}: BROKER_HOST #{broker} does not resolve (#{e})")
    end

    # attempt to resolve the node public hostname
    begin
      # must resolve as a FQDN, so should be the full name
      # (the "." at the end blocks adding a search domain)
      resolved_host = IPSocket.getaddress(pubhost + ".")
    rescue Exception => e
      do_fail("#{$NODE_CONF_FILE}: PUBLIC_HOSTNAME #{pubhost} does not resolve as a FQDN (#{e})")
      return
    end

    # make sure public settings resolve correctly to a non-localhost IP
    do_fail("#{$NODE_CONF_FILE}: PUBLIC_HOSTNAME #{pubhost} should be public, not localhost") if localhost.member? resolved_host
    do_fail("#{$NODE_CONF_FILE}: PUBLIC_IP #{pubip} should be public, not localhost") if localhost.member? pubip

    # it would be nice to check that the PUBLIC_IP actually ends up at a NIC on this host.
    # however in settings like EC2, there's no good way to do that.
    #
    # but we can check PUBLIC_HOSTNAME resolves to either a NIC on this host or PUBLIC_IP.
    # in EC2, hostname resolves differently internal vs. external, so will match NIC.
    # in most places, PUBLIC_HOSTNAME will resolve to PUBLIC_IP
    my_ips = `ip addr show scope global`.scan(/^\s+inet\s+([\d\.]+)/).flatten.push(pubip).uniq
    do_fail("#{$NODE_CONF_FILE}: PUBLIC_HOSTNAME #{pubhost} resolves to #{resolved_host}; expected #{my_ips.join '|'}") unless my_ips.member? resolved_host
end

def check_selinux()
    verbose("checking selinux status")

    policy_name = selinux_policy_name

    unless %x[/usr/sbin/getenforce] =~ /Enforcing/
        do_fail("selinux is not enabled")
    else
        verbose("checking selinux #{policy_name} policy")
        do_fail("selinux #{policy_name} policy is not loaded") unless %x[/usr/bin/timeout -s9 #{$OPTIONS[:timeout]} /usr/sbin/semodule -l] =~ /#{policy_name}[\d\.\W]+$/
    end
    verbose("checking selinux booleans")
    $SEBOOL_LIST.split.each { |bool|
        name,value = bool.split(':')
        result = %x[/usr/sbin/getsebool #{name}]
        do_fail("selinux boolean #{name} should be #{value}") unless result =~ /#{name} --> #{value}/
    }

    do_fail("invalid selinux labels in OPENSHIFT_DIR $GEAR_BASE_DIR") unless %x[/sbin/restorecon -n -v #{$GEAR_BASE_DIR}] =~ //

    # This will likely only detect the effects of a full relabel but it's
    # better than nothing.  My unscientific tests shows that checking 100 gears
    # only added about 1.5 seconds execution time.
  Dir.glob(File.join($GEAR_BASE_DIR, "*")).select { |d| File.directory?(d) and not (File.symlink?(d) or (File.stat(d).uid == 0)) }[0,99].each do |dir|
      out = %x[/bin/ls -d --scontext #{dir}/app-root 2>/dev/null]
      mcs = /.+:.+:.+:.+:c\d+,c\d+? /.match(out)
      if mcs.nil?
        user = File.basename(dir)
        user_fail(user, "invalid MCS labels on #{dir}/app-root. run oo-restorecon to restore OpenShift SELinux categories")
        break
      end
    end

    if defined?(check_selinux_additional)
      check_selinux_additional
    end
end

def check_packages
    verbose("checking package list")
    packages = "#{$PACKAGES}  #{ext_packages.join(' ')}"
    not_installed = %x[/bin/rpm -q #{packages}].split("\n").grep(/not/)
    not_installed.each { |pack_not_installed|
        do_fail(pack_not_installed)
    }
end

def check_services
    verbose("checking services")
    $SERVICES.split.each { |service|
        %x[/sbin/service #{service} status &>/dev/null]
        do_fail("service #{service} not running") unless $?.exitstatus == 0
    }
end

def check_service_contexts
    offset = 0
    while m = /([-\w]+)\=[\'\"]?(\w+):(\w+):(\w+):([\w\-\:\.\,]+)[\'\"]?/.match($SERVICE_CONTEXTS[offset..-1])
      offset += m.end(0)
      service = m[1]
      serole = m[3]
      setype = m[4]
      selabel = m[5]

      servout = %x[/sbin/service #{service} status 2>&1]
      begin
        pid = /pid\s+(\d+)/.match(servout)[1]
        pcontext = File.read("/proc/#{pid}/attr/current").strip
        pm = /(\w+):(\w+):(\w+):([\w\-\:\.\,]+)/.match(pcontext)
        if (m[3] != pm[2]) or (m[4] != pm[3]) or (m[5] != pm[4])
          do_fail("SELinux context for #{service} is incorrect")
        end
      rescue
        do_fail("Could not get SELinux context for #{service}")
      end
    end
end

def check_semaphores
    verbose("checking kernel semaphores >= #{$MINSEM}")
    semcount=%x[/sbin/sysctl kernel.sem | /bin/cut -f4].strip.to_i
    do_fail("kernel.sem semaphores too low: #{semcount} < #{$MINSEM}") if semcount <= $MINSEM
end

#
# Check cgroup config
#
def check_cgroup_config
    verbose("checking cgroups configuration")

    if %x[/bin/lscgroup cpu,cpuacct,memory,freezer,net_cls:/ 2>/dev/null | /usr/bin/wc -l].strip.to_i < 1
        do_fail("The root cgroup does not exist (cgroups not enabled)")
        $CGROUP_PASS=false
    end
end


#
# Check gear processes belong to cgroups
#
def check_cgroup_procs
    verbose("checking cgroups processes")

    ### Gather current procs running ###
    min_uid = $CONF.get('GEAR_MIN_UID').to_i
    max_uid = $CONF.get('GEAR_MAX_UID').to_i

    district_uuid = Facter.[]('district_uuid').value
    verbose("find district uuid: #{district_uuid}")

    if district_uuid.casecmp("None") != 0
      min_uid = Facter.[]('district_first_uid').value.to_i unless Facter.[]('district_first_uid').nil?
      max_uid = Facter.[]('district_max_uid').value.to_i unless Facter.[]('district_max_uid').nil?
    end

    verbose("determining node uid range: #{min_uid} to #{max_uid}")

    all_user_procs = %x[/bin/ps -e -o uid,pid].split("\n")
    ps_procs = Hash.new{|hash, key| hash[key] = Array.new}
    all_user_procs.each do |line|
        uid,pid = line.split[0,2]
        uid = uid.to_i

        if uid.between?(min_uid, max_uid)
            passwd_lines = $USERS.select { |u| u.uid == uid }

            if passwd_lines.empty?
                do_fail("Process #{pid} is owned by a gear that's no longer on the system, uid: #{uid}")
                next
            end

            uname = passwd_lines[0].name
            ps_procs[uname] += [pid]
            ps_procs[uname].uniq!
        end
    end

    ### Gather cgroup procs ###
    cgroup_procs = Hash.new{|hash, key| hash[key] = Hash.new{|hash, key| hash[key] = Array.new}}

    # Support mounting cgroup controllers under /cgroup/all or 
    # /cgroup/<controller>
    Dir.glob("/cgroup/*/openshift/*/cgroup.procs").each do |file|
        controller = file.split("/")[2]
        uuid = file.split("/")[4]
        lines = []
        IO.foreach(file).each { |line| lines << line.strip }
        cgroup_procs[controller][uuid] = lines
    end

    ### Compare ###
    ps_procs.each do |uuid,procs|
        cgroup_procs.each do |controller,controller_procs|
            missing = procs - controller_procs[uuid]
            missing.each do |pid|
                # ensure the process is still running and not defunct before failing
                # this fixes both the transient process and the defunct process
                # detection problems
                begin
                    if File.read("/proc/#{pid}/status") !~ /^State:\s+Z/
                        user_fail(uuid, "#{uuid} has a process missing from cgroups: #{pid} cgroups controller: #{controller}")
                    end
                rescue Errno::ENOENT
                end
            end
        end
    end
end

#
# Check tc config
#
def check_tc_config
    verbose("checking presence of tc qdisc")
    qdiscresponse="qdisc htb 1: root"

    result = %x[/sbin/tc qdisc show dev #{$EXTERNAL_ETH_DEV} | /bin/grep -q "#{qdiscresponse}"]
    if $?.exitstatus != 0
        do_fail("tc htb qdisc not configured")
        $TC_PASS=false
    else
        verbose("checking for cgroup filter")
        if %x[/sbin/tc filter show dev #{$EXTERNAL_ETH_DEV} | /bin/grep 'cgroup handle' | /usr/bin/uniq | /usr/bin/wc -l].strip.to_i < 1
            do_fail("no cgroup filter configured")
            $TC_PASS=false
        else
            verbose("checking presence of tc classes")
            if %x[/sbin/tc class show dev #{$EXTERNAL_ETH_DEV} | /bin/grep 'class htb' | /usr/bin/wc -l].strip.to_i < 1
                do_fail("no htb classes configured")
                $TC_PASS=false
            end
        end
    end
end

def check_quotas
    verbose("checking filesystem quotas")
    oo_device=%x[/bin/df -P #{$GEAR_BASE_DIR} | /usr/bin/tail -1].split[0]
    oo_mount=%x[/bin/df -P #{$GEAR_BASE_DIR} | /usr/bin/tail -1 | /usr/bin/tr -s ' '].split[5]
    unless %x[/sbin/quotaon -u -p #{oo_mount} 2>&1].strip == "user quota on #{oo_mount} (#{oo_device}) is on"
        do_fail "quotas are not enabled on #{oo_mount} (#{oo_device})"
    end

    quota_db_file=File.join(oo_mount,"/aquota.user")
    if File.exists? quota_db_file
        verbose("checking quota db file selinux label")
        quota_db_type = %x[/usr/bin/secon -f #{quota_db_file} | /bin/grep type: ]
        if quota_db_type !~ /quota_db_t/
            do_fail("quota db file: selinux type is incorrect: #{quota_db_type}")
        end
    else
        do_fail("quota db file #{quota_db_file} does not exist")
    end
end

def check_users
    verbose("checking #{$USERS.length} user accounts")
    conf_dir_templ = "#{$LIMITS_CONF_DIR}/84-%s.conf"
    $USERS.each do |user|
        #Test home dir
        user_fail(user.name, "user #{user.name} does not have a home directory #{user.dir}") unless File.exists? user.dir
        user_fail(user.name, "user #{user.name} does not have a PAM limits file") unless File.exists?(conf_dir_templ % user.name)
        if $CGROUP_PASS
          check_user_cgroups user.name
        end
        if $TC_CHECK && $TC_PASS
            hex_uid = user.uid.to_i.to_s(16)
            tc_results = $TC_DATA.grep(/1:#{hex_uid}/)
            user_fail(user.name, "user #{user.name} must have 1 tc class entry: actual=#{tc_results.length}") if tc_results.length != 1
        end

        results = $ALL_QUOTAS.grep(/#{user.name}/)
        if results.length == 0 ||
           results[0].split[4].to_i == 0 ||
           results[0].split[7].to_i == 0
            user_fail(user.name, "user #{user.name} does not have quotas imposed")
        end
    end

end

# Verify that the user has a cgroup in all subsystems and no others
def check_user_cgroups(username)
  user_cgroup="/openshift/#{username}"
  test_subsystems = Set.new(['cpu', 'cpuacct', 'memory', 'freezer', 'net_cls'])
  if not test_subsystems == $CGROUP_DATA[username]
    user_fail(username, "user #{username} must have all cgroup subsystems.")
  end
end

def check_system_httpd_configs
  verbose("checking system httpd configs")

  if $CONF.get('OPENSHIFT_FRONTEND_HTTP_PLUGINS') == nil
    do_fail("No frontend plugins configured in OPENSHIFT_FRONTEND_HTTP_PLUGINS")
    return
  end

  return unless $CONF.get('OPENSHIFT_FRONTEND_HTTP_PLUGINS').split(',').include?('openshift-origin-frontend-apache-mod-rewrite')
  return unless $CONF.get('OPENSHIFT_FRONTEND_HTTP_PLUGINS').split(',').include?('openshift-origin-frontend-nodejs-websocket')

  ['aliases.db', 'idler.db', 'nodes.db', 'sts.db'].each do |db|
    begin
      File.open(File.join($CONF.get('OPENSHIFT_HTTP_CONF_DIR'), db))
    rescue => e
      do_fail("#{e}")
    end
  end

  httpconfs = Hash.new
  ['aliases.txt', 'idler.txt', 'nodes.txt', 'sts.txt'].each do |db|
    httpconfs[db]=Hash.new
    begin
      File.open(File.join($CONF.get('OPENSHIFT_HTTP_CONF_DIR'), db)) do |f|
        f.each do |l|
          path, dest = l.strip.split
          if (not path.nil?) and (not dest.nil?)
            httpconfs[db][path]=dest
          end
        end
      end
    rescue => e
      do_fail("#{e}")
    end
  end

  ['routes.json', 'geardb.json'].each do |db|
    begin
      File.open(File.join($CONF.get('OPENSHIFT_HTTP_CONF_DIR'), db)) do |f|
        httpconfs[db] = JSON.parse(f.read)
      end
    rescue => e
      httpconfs[db] = Hash.new
      do_fail("#{e}")
    end
  end

  httpconfs['nodes.txt'].delete_if   { |k,v|  k.split('/')[0] == '__default__' }
  mangled_gears=[]
  $USERS.each do |u|
    dnsfile = File.join($GEAR_BASE_DIR, u.name, '.env', 'OPENSHIFT_GEAR_DNS')
    if not File.exists?(dnsfile)
      user_fail(u.name, "Gear does not have an OPENSHIFT_GEAR_DNS variable: '#{u.name}'")
      mangled_gears << u.name
      next
    end

    fqdn = File.read(dnsfile)
    if fqdn.start_with?('export ')
      fqdn = fqdn.sub(/^.*=/,'').gsub('\'','').gsub('"','').strip.downcase
    else
      fqdn.chomp!
      fqdn.downcase!
    end

    begin
      SafeYAML::OPTIONS[:default_mode] = :safe
      has_websocket = false
      has_framework = false

      carts = Dir.glob(File.join(u.dir, '*')).select { |p|
        not File.symlink?(p)
      }.map { |p|
        File.basename(p)
      }.map { |p|
        cpath = nil
        cp = File.join($CONF.get('CARTRIDGE_BASE_PATH'), p)
        cpath = cp if File.exists?(cp)
        cpath
      }.select { |p|
        not p.nil?
      }.each { |p|
        ['metadata', 'info'].each { |mp|
          mfile = File.join(p, mp, 'manifest.yml')
          next unless File.exists?(mfile)
          begin
            manifest = YAML.load(File.read(mfile), :safe => true)

            manifest["Categories"] ||= []
            manifest["Endpoints"]  ||= []

            if manifest["Categories"].include?('web_framework')
              manifest["Endpoints"].each do |endpoint|
                endpoint["Protocols"] ||= ["http"]

                if endpoint["Mappings"]
                  endpoint["Mappings"].each do |mapping|

                    mapping["Options"] ||= {}

                    if mapping["Options"]["websocket"]
                      endpoint["Protocols"] << "ws"
                    end

                    has_framework = true if endpoint["Protocols"].include?("http")
                    has_websocket = true if endpoint["Protocols"].include?("ws")
                  end
                end
              end
            end

          rescue
          end
        }
      }

      if has_framework and not httpconfs['nodes.txt'].has_key?(fqdn)
        user_fail(u.name, "Gear has a web framework cartridge but no Apache configuration: #{u.name}")
      end

      if has_websocket and not httpconfs['routes.json'].has_key?(fqdn)
        user_fail(u.name, "Gear has a websocket framework cartridge but no websocket configuration: #{u.name}")
      end


      httpconfs['nodes.txt'].delete_if   { |k,v|  k.split('/')[0] == fqdn }
      httpconfs['aliases.txt'].delete_if { |k,v|  v == fqdn }
      httpconfs['idler.txt'].delete(fqdn)
      httpconfs['sts.txt'].delete(fqdn)
      httpconfs['routes.json'].delete(fqdn)
      httpconfs['routes.json'].delete_if { |k, v| v["alias"] == fqdn }
      httpconfs['geardb.json'].delete_if { |k, v| v["fqdn"] == fqdn }
    rescue
    end
  end

  fail_dns_names = Hash.new
  httpconfs['nodes.txt'].each   { |k, v| fail_dns_names[k.split('/')[0]]=1 }
  httpconfs['aliases.txt'].each { |k, v| fail_dns_names[v]=1 }
  httpconfs['idler.txt'].each   { |k, v| fail_dns_names[k]=1 }
  httpconfs['sts.txt'].each     { |k, v| fail_dns_names[k]=1 }
  httpconfs['routes.json'].each do |k, v|
    if v["alias"]
      fail_dns_names[v["alias"]]=1
    else
      fail_dns_names[k]=1
    end
  end


  httpconfs['geardb.json'].each do |k, v|
    if not mangled_gears.include?(k)
      user_fail(k, "httpd config references UUID without associated gear: '#{k}'")
    end
    fail_dns_names.delete(v["fqdn"])
  end

  fail_dns_names.each do |k, v|
    do_fail("httpd config references DNS name without associated gear: '#{k}'")
  end

  Dir.foreach($CONF.get('OPENSHIFT_HTTP_CONF_DIR')) { |cfile|
    next if cfile[0,1] == "."
    next if cfile[-5,5] != ".conf"
    if $USERS.select { |u| u.name == cfile[0..cfile.index('_')-1]}.empty?
      do_fail("httpd config file #{cfile} doesn't have an associated user")
    end
  }
end

def check_app_dirs
  verbose("checking application dirs")
  copy_all_users = $USERS.dup

  Dir.foreach($GEAR_BASE_DIR) do |entry|
    next if entry[0,1] == "."
    next if entry == 'lost+found'
    next if entry == 'last_access.log'

    gear_home = File.join($GEAR_BASE_DIR, entry)

    next if File.symlink? gear_home
    next unless File.directory? gear_home

    [ ".ssh", ".env", ".sandbox", ".tmp" ].each do |dotdir|
         user_fail(entry, "directory #{entry} doesn't have a #{dotdir} directory") unless File.directory? File.join(gear_home,dotdir)
    end

    if Dir.glob(File.join(gear_home, %w(* metadata manifest.yml))).size == 0
      user_fail(entry, "directory #{entry} doesn't have a cartridge directory")
    end

    if copy_all_users.select { |u| u.name == entry }.empty?
      user_fail(entry, "directory #{entry} doesn't have an associated user")
    else
      copy_all_users.delete_if { |u| u.name == entry }
    end
  end
end

def check_upgrades
  verbose("running upgrade checks")

  Dir.foreach($GEAR_BASE_DIR) do |entry|
    next if entry[0,1] == "."
    next if entry == 'lost+found'
    next if entry == 'last_access.log'

    gear_home = File.join($GEAR_BASE_DIR, entry)

    next if File.symlink? gear_home
    next unless File.directory? gear_home

    v1_cartridge_dirs_found = false

    Dir.foreach(gear_home) do |gear_entry|
      next if gear_entry[0,1] == "."

      gear_entry_full = File.join(gear_home, gear_entry)

      next if File.symlink? gear_entry_full
      next unless File.directory? gear_entry_full
      next unless V1_DIRECTORIES.include?(gear_entry)

      v1_cartridge_dirs_found = true
      break
    end

    upgrade_markers_found = !Dir.glob(File.join(gear_home, %w(app-root runtime .upgrade*))).empty?
    preupgrade_state_found = File.exists?(File.join(gear_home, %w(app-root runtime .preupgrade_state)))

    if v1_cartridge_dirs_found
      user_fail(entry, "directory #{entry} contains a V1 cartridge directory")
    end

    if upgrade_markers_found 
      user_fail(entry, "directory #{entry} contains upgrade data")
    end

    if preupgrade_state_found
      user_fail(entry, "directory #{entry} contains pre-upgrade state")
    end
  end
end

def check_cartridge_repository
  verbose("checking cartridge repository")

  SafeYAML::OPTIONS[:default_mode] = :safe

  dst_glob = "#{$CONF.get('GEAR_BASE_DIR')}/.cartridge_repository/*/*/metadata/manifest.yml"
  dst_ctimes = {}
  dst_manifests = {}
  Dir.glob(dst_glob).each do |f|
    begin
      dst_ctimes[f] = File.stat(f).ctime
      dst_manifests[f] = YAML.safe_load_file(f)
      dst_manifests[f]['Name'] || dst_manifests[f]['Cartridge-Version']
    rescue Psych::SyntaxError
      do_fail("failed to parse manifest file: #{f}")
    rescue => e
      do_fail("error with manifest file: #{f} #{e}")
      verbose(e.backtrace.join("\n"))
      dst_manifests.delete(f)
    end
  end

  src_glob = "#{$CONF.get('CARTRIDGE_BASE_PATH')}/*/metadata/manifest.yml"
  Dir.glob(src_glob).each do |f|
    begin
      src_m = YAML.safe_load_file(f)
      src_ct = File.stat(f).ctime

      matched_ct = nil
      dst_manifests.each do |dst_f, dst_m|
        if ( (src_m['Name']              == dst_m['Name']) and
             (src_m['Cartridge-Version'] == dst_m['Cartridge-Version']) )
          matched_ct = dst_ctimes[dst_f]
          break
        end
      end

      if matched_ct.nil?
        do_fail("no manifest in the cart repo matches #{f}")
      elsif matched_ct < src_ct
        do_fail("cart repo version is older than #{f}")
      end

    rescue Psych::SyntaxError
      do_fail("failed to parse manifest file: #{f}")
    rescue => e
      do_fail("error with manifest file: #{f} #{e}")
      verbose(e.backtrace.join("\n"))
    end

  end
end

if __FILE__ == $0
    $STATUS=0
    load_node_conf
    find_ext_net_dev
    load_users

    if $OPTIONS[:'run-upgrade-checks']
      check_upgrades
    else
      validate_env
      check_node_public_resolution
      check_selinux
      check_packages
      check_services
      check_service_contexts
      check_semaphores
      check_cgroup_config
      check_cgroup_procs
      check_tc_config if $TC_CHECK
      check_quotas
      check_users
      check_app_dirs
      check_system_httpd_configs
      check_cartridge_repository
    end

    if $STATUS == 0
        puts "PASS"
    else
        eputs "#{$STATUS} ERRORS"
    end

    exit($STATUS)
end
