#!/usr/bin/env oo-ruby
# Run with "-h" for usage, "-v" to see INFO-level output while running.
#
# Run this script as root on any kind of OpenShift host to diagnose common problems.
# It is intended to evolve quickly in response to actual problems experienced.
# It does not perform any function or send results anywhere; it just outputs
# diagnostic information which may be of some use in troubleshooting.
#
# INFO output is strictly informational.
# WARN output means something is not right but may not impair functionality.
# FAIL output means a serious problem probably impairing functionality.
#
# OpenShift Online admins may need to adjust PATH to use proper ruby and gem for broker tests.
#
# Please report false positives/negatives or other problems with this script
# via https://bugzilla.redhat.com/

#--
# Copyright 2013 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

class OODiag

  require 'rubygems'
  require 'openshift-origin-common'
  require 'tempfile'
  require 'bundler'

  def initialize(options = nil)
    @options = options || @options || {
      :wait => 2,
      :verbose => false,
    }
    @errors = 0
    @warns = 0
    @is_broker=false
    @is_node=false
    @rpms={}
    @project_is={}
    @os_is={}
  end

  def run_setup
    initialize
    load_rpm_list
    detect_system_properties
    load_broker_rails_env if @is_broker
  end

  def run_tests
    tests = @options[:tests]
    if tests.nil? || tests.empty?
      tests = self.class.instance_methods.select {|m| m.to_s.start_with? "prereq_"}
      tests += self.class.instance_methods.select {|m| m.to_s.start_with? "test_"}
    end
    tests.each do |m|
      begin
        verbose "running: #{m}"
        send m
      rescue SkipTest
        verbose "skipping #{m}"
      rescue AbortTests
        unless @options[:abortok]
          eputs "Aborting tests according to #{m}. To run all tests anyway, use the --abortok option."
          break
        end
      rescue StandardError => e
        do_fail "error running #{m}: #{e.inspect}"
      end
    end
    return [@warns, @errors]
  end

  ######## UTILITIES ########

  def called_from; caller[1][/`([^']*)'/, 1]; end
  def eputs(msg); $stderr.write "\e[#{31}m#{msg}\e[0m\n"; end
  def wputs(msg); $stderr.write "\e[#{33}m#{msg}\e[0m\n"; end

  def verbose(msg)
    @options[:verbose] and $stdout.write("INFO: #{msg}\n")
  end

  def do_fail(msg)
    eputs("FAIL: #{called_from}\n" + msg)
    @errors += 1
  end

  def do_warn(msg)
    wputs("WARN: #{called_from}\n" + msg)
    @warns += 1
  end

  class SkipTest < StandardError; end
  def skip_test; raise SkipTest; end
  class AbortTests < StandardError; end
  def abort_tests; raise AbortTests; end

  # when you need two string arrays to be the same length
  def pad_arrays!(a, b)
    a << "0" while a.length < b.length
    b << "0" while b.length < a.length
  end

  def is_rpm_minimum_version?(rpm_hash, need_version="", need_release="")
    # this doesn't account for epoch versions. revise if needed.
    #
    # get hash if we were passed a string
    rpm_hash = @rpms[rpm_hash] or return false if rpm_hash.is_a? String
    # split out the version/release strings
    rpm_version = rpm_hash[:version].split '.'
    rpm_release = rpm_hash[:release].split '.'
    req_version = need_version.split '.'
    req_release = need_release.split '.'
    # pad array lengths with "0" entries so position comparison are equal
    pad_arrays!(req_version, rpm_version)
    pad_arrays!(req_release, rpm_release)
    # pad entries with space and we can just compare string-wise
    rpm_normalized = (rpm_version+rpm_release).map {|x| '%10s' % x}.join
    req_normalized = (req_version+req_release).map {|x| '%10s' % x}.join
    return rpm_normalized >= req_normalized
  end

  def require_rpm_minimum_version(rpm, need_version="0", need_release="0")
    unless rpm_hash = @rpms[rpm]
      do_fail "required rpm #{rpm} is not installed"
      return
    end
    if is_rpm_minimum_version?(rpm_hash, need_version, need_release)
      verbose "rpm #{rpm} installed with at least version #{need_version}-#{need_release}"
    else
      do_fail "rpm #{rpm} installed with version #{rpm_hash[:version]}-#{rpm_hash[:release]}; need at least #{need_version}-#{need_release}"
    end
  end

  def run_script(script)
    verbose "running #{script}"
    output = ""
    Bundler.with_clean_env do
      output = `#{script} 2>&1`
    end
    if $?.success?
      verbose "#{script} ran without error:\n--BEGIN OUTPUT--\n#{output}\n--END #{script} OUTPUT--"
    else
      do_fail "#{script} had errors:\n--BEGIN OUTPUT--\n#{output}\n--END #{script} OUTPUT--"
    end
  end

  def executable?(cmd) #true if command exists and can exec
    system "command -v #{cmd} >& /dev/null"
  end

######## SETUP #############

  def load_rpm_list
    verbose "loading list of installed packages"
    `rpm -qa --qf '%{NAME}|%{VERSION}|%{RELEASE}|%{PACKAGER}\n'`.split("\n").each do |rpm|
      rpm = rpm.split '|'
      @rpms[rpm[0]] = {
        :name => rpm[0],
        :version => rpm[1],
        :release => rpm[2],
        :packager => rpm[3],
      }
    end
    return @rpms
  end

  def detect_system_properties
    @is_broker = %w[openshift-origin-broker rhc-broker].any? {|name| @rpms[name]}
    verbose "OpenShift broker installed." if @is_broker
    @is_node= %w[rubygem-openshift-origin-node rhc-node].any? {|name| @rpms[name]}
    verbose "OpenShift node installed." if @is_node

    # These tests probably need a lot of work to be robust
    #
    @project_is = {}
    # It's OpenShift Enterprise if there are any el6op RPMs on the system.
    @project_is[:enterprise] = @rpms.values.select {|rpm| rpm[:release].include? 'el6op'}.length > 0
    # It's OpenShift Online if certain RPMs are present
    @project_is[:online] = @rpms['rhc-common'] && true
    # It's OpenShift Origin otherwise...
    @project_is[:origin] = !(@project_is[:enterprise] || @project_is[:online])
    # detect whether the ruby193 SCL is in play.
    system "scl enable ruby193 echo >& /dev/null"
    @scl_prefix = $?.success? ? "ruby193-" : "" 
    # detect whether we use service or systemctl
    @use_systemctl = executable? "systemctl"

    # looking for RHEL or Fedora version via /etc/redhat-release
    # If something else... I dunno
    release = File.read '/etc/redhat-release'
    @os_is = {
      # e.g. Fedora release 17 (Beefy Miracle)
      :fedora => release.include?('Fedora'),
      :fedora16 => release.include?('Fedora release 16'),
      :fedora17 => release.include?('Fedora release 17'),
      :fedora18 => release.include?('Fedora release 18'),
      :fedora19 => release.include?('Fedora release 19'),
      # e.g. Red Hat Enterprise Linux Server release 6.3 (Santiago)
      :rhel => release.include?('Enterprise Linux'),
      :rhel6 => release.include?('Enterprise Linux Server release 6'),
      :rhel64 => release.include?('Enterprise Linux Server release 6.4'),
      :rhel7 => release.include?('Enterprise Linux Server release 7'),
    }
  end

  def load_broker_rails_env
    begin
      verbose "Loading the broker rails environment."
      require "#{ENV['OPENSHIFT_BROKER_DIR'] || '/var/www/openshift/broker'}/config/environment"
      # Disable analytics for admin scripts
      Rails.configuration.analytics[:enabled] = false
      Rails.configuration.msg_broker[:rpc_options][:disctimeout] = @options[:wait]
    rescue Interrupt
      raise
    rescue Exception => e
      do_fail <<-"FAIL"
        Broker application failed to load. This is often a gem dependency problem.
        Updating rubygem RPMs and restarting openshift-broker
        to regenerate the broker Gemfile.lock may fix the problem.
        The actual error encountered was:
        #{e.inspect}
        ***
        THIS PROBLEM NEEDS TO BE RESOLVED FOR THE BROKER TO WORK.
        DISABLING BROKER TESTS.
        ***
      FAIL
      @is_broker = false
    end
  end

######## TESTS #############
  #
  # Methods beginning with prereq_ or test_ will be run automatically.
  # prereq_ methods run before test_ methods in order to enable aborting testing
  # if there are serious problems.

  def have_host_command
    return true if executable? "host"
    do_warn <<-WARN unless @tested_host_command
       The "host" command is not available. This command is not required; however,
       you may install it, by installing the "bind-utils" RPM, to enable further
       diagnostics checking.
    WARN
    @tested_host_command = true
    false
  end

  def prereq_dns_server_available
    verbose "checking that the first server in /etc/resolv.conf responds"
    server = nil
    File.open('/etc/resolv.conf').each_line do |line|
      next unless line =~ /^\s*nameserver\s+(\S+)/
      server = $1
      break
    end
    if server.nil?
      do_fail <<-"FAIL"
        Your /etc/resolv.conf does not define any nameservers.
        Not much is going to work without DNS resolution.
      FAIL
      abort_tests
    end
    # just looking for a nameserver response. at this point we
    # don't much care if resolution is correct; what we want
    # is to avoid running a bunch more tests when the nameserver
    # is obviously broken.
    skip_test unless have_host_command
    command = "host -W 1 example.com. #{server}"
    # IPSocket.getaddress goes through /etc/hosts;
    # "host" uses DNS which is what we want.
    output = `#{command} 2>&1`
    if output =~ /connection timed out/
      do_fail <<-"FAIL"
        #{server} doesn't appear to respond to DNS requests.
        This command:
          #{command}
        should have connected to your primary nameserver.
        Instead, it returned:
          #{output}
        Please check the following to resolve this issue:
        * Does /etc/resolv.conf have your correct nameserver?
        * Is your nameserver running?
        * Is the firewall on your nameserver open (udp:53)?
        * Can you connect to your nameserver?
        Many OpenShift functions fail without working DNS resolution.
      FAIL
      abort_tests
    end
  end

  def prereq_domain_resolves
    skip_test unless @is_broker or @is_node
    skip_test unless have_host_command

    verbose "checking that we can resolve our application domain"

    domain = @is_broker ? Rails.configuration.openshift[:domain_suffix]
                        : OpenShift::Config.new.get('CLOUD_DOMAIN')
    # Test that the nameserver for the domain is known.
    # IPSocket.getaddress goes through /etc/hosts;
    # "host" uses DNS which is what we want.
    command = "host -W 5 -t NS '#{domain}'"
    output = `#{command} 2>&1`
    if output !~ /name server/
      do_fail <<-"FAIL"
        Application domain does not appear to resolve under
        current nameserver configuration. This command:
          #{command}
        should have returned the nameserver(s) for #{domain}.
        Instead, it returned:
          #{output}
        Please check the following to resolve this issue:
        * Is CLOUD_DOMAIN=#{domain} in #{@is_broker ? 'broker':'node'}.conf correct?
        * Does /etc/resolv.conf have the right nameserver(s)?
        * Is your OpenShift domain nameserver running?
        * Is the firewall on your nameserver open (udp:53)?
        * Does your nameserver respond to queries via dig/host?
        Many OpenShift functions may fail without application DNS.
      FAIL
    end
  end

# These are dependencies specific to OpenShift Enterprise but built
# elsewhere
ENTERPRISE_RPMS = %w[ atlas-devel blas-devel gd-devel
 ghostscript-devel ImageMagick-devel ImageMagick-perl jasper-devel
 lapack-devel lcms-devel libc-client libcgroup-pam maven3 numpy-f2py
 perl-Class-Accessor perl-Class-Data-Inheritable perl-Class-Trigger
 perl-Clone perl-IO-stringy perl-JSON perl-YAML php-bcmath php-devel
 php-imap php-pecl-imagick php-process ruby193-rubygem-ZenTest
 ruby193-rubygem-bigdecimal ruby193-rubygem-io-console
 ruby193-rubygem-json_pure ruby193-rubygem-rake ruby193-rubygem-rdoc
 ruby193-rubygem-rspec rubygem-rake uuid-pgsql xerces-c
 yum-plugin-priorities
]
# These should come from OpenShift Enterprise specifically
ENTERPRISE_OPENSHIFT_RPMS = %w[ activemq activemq-client armadillo
 cfitsio CharLS freexl gdal gdal-libs geos geos-devel gpsbabel haproxy
 haproxy15side hdf5 jboss-eap6-index jboss-eap6-modules jenkins
 jenkins-plugin-openshift js js-devel libc-client-devel libdap libev
 libev-devel libgeotiff libgta libmcrypt libmcrypt-devel libmongodb
 libmongodb-devel libspatialite libwebp mod_passenger mongodb
 mongodb-devel mongodb-server netcdf nodejs010-nodejs-bignumber.js
 nodejs010-nodejs-bson nodejs010-nodejs-buffer-crc32
 nodejs010-nodejs-bytes nodejs010-nodejs-colors
 nodejs010-nodejs-commander nodejs010-nodejs-connect
 nodejs010-nodejs-cookie nodejs010-nodejs-cookie-signature
 nodejs010-nodejs-debug nodejs010-nodejs-express
 nodejs010-nodejs-formidable nodejs010-nodejs-fresh
 nodejs010-nodejs-generic-pool nodejs010-nodejs-keypress
 nodejs010-nodejs-methods nodejs010-nodejs-mongodb
 nodejs010-nodejs-mysql nodejs010-nodejs-node-static
 nodejs010-nodejs-optimist nodejs010-nodejs-options
 nodejs010-nodejs-pause nodejs010-nodejs-pg
 nodejs010-nodejs-range-parser nodejs010-nodejs-require-all
 nodejs010-nodejs-send nodejs010-nodejs-supervisor
 nodejs010-nodejs-tinycolor nodejs010-nodejs-wordwrap
 nodejs010-nodejs-ws openshift-enterprise-release
 openshift-enterprise-upgrade-broker openshift-enterprise-upgrade-node
 openshift-enterprise-yum-validator openshift-origin-broker
 openshift-origin-broker-util openshift-origin-cartridge-cron
 openshift-origin-cartridge-diy openshift-origin-cartridge-haproxy
 openshift-origin-cartridge-jbosseap
 openshift-origin-cartridge-jbossews openshift-origin-cartridge-jenkins
 openshift-origin-cartridge-jenkins-client
 openshift-origin-cartridge-mock openshift-origin-cartridge-mock-plugin
 openshift-origin-cartridge-mysql openshift-origin-cartridge-nodejs
 openshift-origin-cartridge-perl openshift-origin-cartridge-php
 openshift-origin-cartridge-postgresql
 openshift-origin-cartridge-python openshift-origin-cartridge-ruby
 openshift-origin-console openshift-origin-msg-common
 openshift-origin-msg-node-mcollective openshift-origin-node-proxy
 openshift-origin-logshifter openshift-origin-node-util
 openshift-origin-port-proxy openshift-origin-util-scl pam_openshift
 perl-App-cpanminus perl-Class-DBI perl-Class-DBI-Pg
 perl-Class-Factory-Util perl-DateTime-Format-Builder
 perl-DateTime-Format-Pg perl-DateTime-Format-Strptime
 perl-DBIx-ContextualFetch perl-Ima-DBI perl-UNIVERSAL-moniker
 php-mcrypt php-pear-MDB2 php-pear-MDB2-Driver-pgsql php-pecl-xdebug
 postgis postgis-docs postgresql92-pgRouting postgresql92-postgis
 postgresql-ip4r proj proj-devel proj-nad python27-mod_wsgi
 python27-numpy python27-numpy-f2py python27-python-pip-virtualenv
 python-virtualenv ruby193-facter ruby193-js ruby193-js-devel
 ruby193-mcollective ruby193-mcollective-client
 ruby193-mcollective-common ruby193-mod_passenger ruby193-rubygem-bson
 ruby193-rubygem-bson_ext ruby193-rubygem-chunky_png
 ruby193-rubygem-commander ruby193-rubygem-compass
 ruby193-rubygem-compass-rails ruby193-rubygem-daemon_controller
 ruby193-rubygem-daemons ruby193-rubygem-dnsruby
 ruby193-rubygem-fastthread ruby193-rubygem-file-tail
 ruby193-rubygem-formtastic ruby193-rubygem-fssm ruby193-rubygem-haml
 ruby193-rubygem-highline ruby193-rubygem-json ruby193-rubygem-minitest
 ruby193-rubygem-mongo ruby193-rubygem-mongoid ruby193-rubygem-moped
 ruby193-rubygem-open4 ruby193-rubygem-origin
 ruby193-rubygem-parseconfig ruby193-rubygem-passenger
 ruby193-rubygem-passenger-devel ruby193-rubygem-passenger-native
 ruby193-rubygem-passenger-native-libs ruby193-rubygem-pg
 ruby193-rubygem-rdiscount ruby193-rubygem-regin
 ruby193-rubygem-rest-client ruby193-rubygem-ruby2ruby
 ruby193-rubygem-ruby_parser ruby193-rubygem-safe_yaml
 ruby193-rubygem-sass-twitter-bootstrap ruby193-rubygem-sexp_processor
 ruby193-rubygem-spruz ruby193-rubygem-state_machine
 ruby193-rubygem-stomp ruby193-rubygem-syslog-logger
 ruby193-rubygem-systemu ruby193-rubygem-term-ansicolor
 ruby193-rubygem-xml-simple ruby193-ruby-mysql ruby193-ruby-selinux
 ruby193-ruby-wrapper rubygem-bson rubygem-bson_ext rubygem-builder
 rubygem-bundler rubygem-diff-lcs rubygem-fastthread rubygem-file-tail
 rubygem-mime-types rubygem-nokogiri rubygem-open4
 rubygem-openshift-origin-admin-console
 rubygem-openshift-origin-auth-remote-user
 rubygem-openshift-origin-common rubygem-openshift-origin-console
 rubygem-openshift-origin-container-selinux
 rubygem-openshift-origin-controller
 rubygem-openshift-origin-dns-nsupdate
 rubygem-openshift-origin-frontend-apachedb
 rubygem-openshift-origin-frontend-apache-mod-rewrite
 rubygem-openshift-origin-frontend-apache-vhost
 rubygem-openshift-origin-frontend-haproxy-sni-proxy
 rubygem-openshift-origin-frontend-nodejs-websocket
 rubygem-openshift-origin-msg-broker-mcollective
 rubygem-openshift-origin-node
 rubygem-openshift-origin-routing-activemq rubygem-ParseTree
 rubygem-passenger rubygem-passenger-devel rubygem-passenger-native
 rubygem-passenger-native-libs rubygem-rack rubygem-ruby2ruby
 rubygem-RubyInline rubygem-ruby_parser rubygems rubygem-sexp_processor
 rubygem-spruz rubygem-sqlite3 rubygem-thor rubygem-thread-dump
 rubygem-ZenTest ruby-mysql ruby-nokogiri ruby-RMagick ruby-sqlite3
 shapelib uuid-devel v8 v8-devel
]

  def test_enterprise_rpms
    skip_test unless @project_is[:enterprise]
    verbose "Checking that all OpenShift RPMs are actually from OpenShift Enterprise"
    rogue_rpms = []
    (ENTERPRISE_OPENSHIFT_RPMS + ENTERPRISE_RPMS).uniq.each do |rpm|
      if @rpms.has_key?(rpm)
        rel = @rpms[rpm][:release]
        ver = @rpms[rpm][:version]
        pkgr = @rpms[rpm][:packager]
        rogue_rpms << "#{rpm} should be an OpenShift Enterprise RPM but installed version #{ver}-#{rel} does not have 'el6op' in it" if ENTERPRISE_OPENSHIFT_RPMS.include?(rpm) && !rel.include?('el6op')
        rogue_rpms << "#{rpm} should be packaged by Red Hat but installed version #{ver}-#{rel} is packaged by '#{pkgr}'" unless pkgr.match /Red Hat/
      end
    end
    rogue_rpms.empty? or do_fail <<-ROGUES
      The following problems were found with your RPMs:
      \n\t#{ rogue_rpms.join("\n\t") }

      Please ensure that you have not enabled EPEL or other third-party repositories, and
      do not have any of these RPMs pre-installed in your install image. These RPMs must come
      from your OpenShift Enterprise subscription in order to be supported.
    ROGUES
  end


  def test_selinux_policy_rpm
    require_rpm_minimum_version 'selinux-policy', "3.7.19", "195.el6_4.4" if @os_is[:rhel6]
  end

  def test_selinux_enabled
    case enforce = `getenforce`.chomp.downcase
    when 'disabled'
      do_fail <<-DISABLED
      SELinux is DISABLED according to getenforce.
      OpenShift will break in various ways without SELinux.
      You need to at least enable SELinux, even if it is just in permissive mode.
      To enable SELinux, edit /etc/selinux/config and set SELINUX=permissive
      If this system was installed with SELinux disabled, you will probably have
      broken contexts and be unable to run in enforcing mode. A reboot after enabling
      SELinux should automatically relabel all contexts to fix this problem.
      DISABLED
    when 'permissive'
      do_warn <<-PERMISSIVE
      SELinux is in permissive mode according to getenforce.
      This means you are not getting any security benefit from SELinux.
      If possible, edit /etc/selinux/config and set SELINUX=enforcing
      and also run "setenforce 1" to enable enforcing mode.
      PERMISSIVE
    when 'enforcing'
      # would like to test that major contexts are correct
      broken = ""
      broken += `restorecon -Rnv /var/www/openshift/` if @is_broker
      broken += `restorecon -Rnv /usr/libexec/openshift/` if @is_node
      broken += `restorecon -Rnv /etc/openshift` if @is_broker || @is_node
      broken += `restorecon -Rnv /opt/rh/ruby193/root/etc` unless @scl_prefix.empty?
      #broken += `restorecon -Rnv /etc` # some of these are irrelevant / spurious
      # so only complain about the ones with common bad contexts
      broken += `restorecon -Rnv /etc | grep -e :user_home_t: -e :user_tmp_t: -e :admin_home_t:`

      do_warn <<-CONTEXT unless broken.empty?
      The following changes should be made to SELinux file contexts. Incorrect contexts
      are likely to cause problems with OpenShift operations. Please review and apply
      the "restorecon <file>" command to fix each as indicated. \n#{broken}
      CONTEXT

      if @rpms['audit']
        avcs = `grep AVC /var/log/audit/audit.log | tail -5`
        if avcs.empty?
          verbose "No recent SELinux AVCs logged. However, SELinux violations are not always logged."
        else
          do_warn <<-AVCS
            The following recent entries in /var/log/audit/audit.log likely indicate problems:
            \n#{avcs}
            Note that SELinux violations are not always logged in the audit log.
          AVCS
        end
      else
        verbose "Please install the 'audit' package to enable searching for recent AVCs."
      end
    else
      do_fail "Received unexpected status from getenforce: #{enforce}"
    end
  end

  def test_broker_cache_permissions
    skip_test unless @is_broker
    if `find /var/www/openshift/broker/tmp/cache/* -user root 2> /dev/null`.length > 0
      do_fail <<-PERMS
        Broker application cache contains files belonging to root. This will probably
        result in cache items that can't expire. Please clear the cache by executing:
            # oo-admin-broker-cache --clear
      PERMS
    else
      verbose "broker application cache permissions appear fine"
    end
  end

  def test_node_profiles_districts_from_broker
    skip_test unless @is_broker

    conf = "/etc/openshift/broker#{Rails.env.development? ? '-dev': ''}.conf"
    conf_profiles = Rails.configuration.openshift[:gear_sizes]
    default_profile = Rails.configuration.openshift[:default_gear_size]
    default_allowed = Rails.configuration.openshift[:default_gear_capabilities]
    districts_required = Rails.configuration.msg_broker[:districts][:require_for_app_create]
    #
    # get the gear profile from every node
    #
    # a_ for the records according to actual nodes that respond
    verbose "checking node profiles via MCollective"
    a_profile_for = Hash.new
    a_nodes_with_profile = Hash.new {|h,k| h[k] = []}
    OpenShift::MCollectiveApplicationContainerProxy.rpc_get_fact("node_profile") do |node,profile|
      if profile.empty?
        do_fail <<-FAIL
          Host #{node} does not have a profile defined.
          This is a serious problem and will prevent it from hosting gears.
          * Is the facter running on the host and updating the mcollective facts.yaml?
          * What profile is specified in the host's /etc/openshift/resource_limits.conf?
        FAIL
      else
        verbose "profile for #{node}: #{profile}"
        a_profile_for[node] = profile
        if profile.include? ","  # trying to specify multiple profiles
          do_fail <<-FAIL
            Host #{node} has specified a profile of "#{profile}" which is invalid.
            A node host can only have a single profile. To provide another profile,
            create another node host with that profile.
          FAIL
        else
          a_nodes_with_profile[profile] << node
        end
      end
    end

    # check that gear profiles from broker.conf match actual available node profiles;
    # also, validate the broker.conf settings make sense
    if conf_profiles.empty?
      do_fail "No gear sizes configured; please fix VALID_GEAR_SIZES in #{conf}"
    else
      do_fail <<-MISSING unless conf_profiles.include? default_profile
        Default gear profile is not included in valid gear sizes
          "#{default_profile}" is not in #{conf_profiles.join ", "}
        Attempts to create apps without specifying gear size will fail.
        Please fix the settings in #{conf}
      MISSING
      if default_allowed # early versions didn't define this, skip if not defined
        missing_profiles = default_allowed - conf_profiles
        do_fail <<-MISSING unless missing_profiles.empty?
          The following gear profile(s) are available to users by default
          (DEFAULT_GEAR_CAPABILITIES), but not valid (VALID_GEAR_SIZES):
            #{missing_profiles.join ", "}
          Attempts to create apps using these gears will fail.
          Please fix the settings in #{conf}
        MISSING
      end
      if a_profile_for.empty?
        do_fail <<-NONE
          No node hosts found. Please install some,
          or ensure the existing ones respond to 'oo-mco ping'.
          OpenShift cannot host gears without at least one node host responding.
        NONE
        skip_test
      end
      missing_profiles = conf_profiles - a_nodes_with_profile.keys
      do_warn <<-MISSING if missing_profiles.any?
        The following gear profile(s) are configured but not provided by any node hosts:
          #{missing_profiles.join ", "}
        Attempts to create apps using these gear profiles will fail.
        Please fix the settings in #{conf} or add node hosts accordingly.
      MISSING
      missing_profiles = a_nodes_with_profile.keys - conf_profiles
      do_warn <<-MISSING if missing_profiles.any?
        The following gear profile(s):
          #{missing_profiles.join ", "}
        are available on at least one node host, but not configured for the broker.
        Please fix the VALID_GEAR_SIZES setting in #{conf}
        or remove / reconfigure the relevant node host(s).
      MISSING
    end

    #
    # get database's list of districts and the nodes within
    #
    districts = District.find_all
    if districts_required # and missing, that's a fail
      if districts.length == 0
        do_fail <<-NONE
          No districts are defined. Districts are required before creating applications.
          Please consult the Administration Guide.
        NONE
        skip_test
      end
    else
      do_warn <<-NONE # but if districts are not required, only a warning
        No districts are defined. Districts should be used in any production installation.
        Please consult the Administration Guide.
      NONE
      skip_test
    end

    verbose "checking that node profiles and districts are consistent"

    # d_ for the records according to districts
    d_profile_for = Hash.new
    d_district_for = Hash.new
    d_profile_active = Hash.new
    d_nodes_with_profile = Hash.new {|h,k| h[k] = []}
    districts.each do |district|
      profile = district.attributes.has_key?('node_profile') ? district.node_profile : district.gear_size
                            # model refactor renamed :node_profile => :gear_size
      verbose "district '#{district.name}' has profile '#{profile}'"
      if district.servers.empty?
        do_warn "\tThere are no node hosts in district '#{district.name}'"
      end
      district.servers.each do |node|
        d_profile_for[node["name"]] = profile
        d_district_for[node["name"]] = district.name
        d_profile_active[profile] = true if node["active"]
        d_nodes_with_profile[profile] << node["name"]
        verbose "  host #{node['name']} is #{node['active']? 'active' : 'inactive'} in district '#{district.name}'"
      end
    end

    # check that gear profiles from broker.conf are matched by district profiles
    unless conf_profiles.empty?
      do_fail <<-MISSING unless d_profile_active[default_profile]
        Default gear profile '#{default_profile}' has no active node hosts supplying it in any district.
        Attempts to create apps without specifying gear size #{districts_required ? "will" : "may"} fail.
        Please add active node hosts to a district with profile '#{default_profile}'
        using oo-admin-ctl-district or fix the settings in #{conf}
      MISSING
      missing_profiles = conf_profiles - d_profile_active.keys
      do_fail <<-MISSING unless missing_profiles.empty?
        The following gear profile(s) are configured:
          #{missing_profiles.join ", "}
        but not provided by any active district hosts.
        Attempts to create apps using these gears #{districts_required ? "will" : "may"} fail.
        Please add districts / node hosts with oo-admin-ctl-district
        or fix the settings in #{conf}
      MISSING
      missing_profiles = d_profile_active.keys - conf_profiles
      do_warn <<-MISSING unless missing_profiles.empty?
        The following gear profile(s) are available from at least one active district host:
          #{missing_profiles.join ", "}
        but not configured in broker.conf, so not usable."
        Please fix the VALID_GEAR_SIZES setting in #{conf}
      MISSING
    end

    # check for consistency between district definitions and actual nodes
    a_profile_for.each do |node,profile|
      # check that all nodes are in a district
      if !d_profile_for[node]
        do_warn <<-NODIST 
          Node host #{node} with profile '#{profile}' is not a member of any district.
          Please add it to a district with oo-admin-ctl-district.
        NODIST
      # check that nodes have same profiles in district definition as in actual node hosts
      elsif d_profile_for[node] != profile
        do_fail <<-WRONG
          Node host #{node} has profile '#{profile}'
          but is in district '#{d_district_for[node]}' with profile '#{d_profile_for[node]}'
          Did you change the node profile after adding it to a district?
        WRONG
      end
    end
    d_profile_for.each do |node,profile|
      # check that no nodes from districts are missing
      do_fail <<-MISSING if !a_profile_for[node]
        Node host #{node} is a member of district '#{d_district_for[node]}' but cannot be found.
        If the host exists, it is not responding via MCollective.
        If it should not be in the district, please remove it with oo-admin-ctl-district.
      MISSING
    end
  end

  def test_broker_accept_scripts
    skip_test unless @is_broker
    if @project_is[:online]
      run_script(@rpms['rhc-devenv'] ? "rhc-accept-devenv" : "rhc-accept-broker")
      run_script("oo-accept-systems -w #{@options[:wait]}") if @rpms["openshift-origin-broker-util"]
    elsif @rpms["openshift-origin-broker-util"]
      run_script("oo-accept-broker")
      run_script("oo-accept-systems -w #{@options[:wait]}")
    else
      do_warn "openshift-origin-broker-util is not installed; you really should install it"
    end
  end

  def test_node_accept_scripts
    skip_test unless @is_node
    if @rpms["openshift-origin-node-util"]
      run_script("oo-accept-node")
    else
      do_warn "openshift-origin-node-util is not installed; you really should install it"
    end
  end

  def expect_log_error(pattern, tmpfile)
    out = `grep '#{pattern}' #{tmpfile}`
    verbose "broker error_log: this message is normal for now:\n  #{out}" unless out.empty?
  end

  def test_broker_httpd_error_log
    skip_test unless @is_broker
    #
    # create a tmp file with all of the unique error log statements after the most recent openshift-broker start
    #
    tmpfile = Tempfile.new("oodiag-log-#{$$}").path
    logfile = '/var/log/openshift/broker/httpd/error_log'
    verbose "no file #{logfile}" and skip_test if ! File.exists? logfile # log file just not there
    system %Q[sed -n 'H; /configured -- resuming normal operations/h; ${g;p;}' #{logfile} | sort -u > #{tmpfile}]
    # if no restart msg, just use the whole thing
    system %Q[sort -u #{logfile} > #{tmpfile}] if ! File.exists? tmpfile
    verbose "log #{logfile} is empty" and skip_test if ! File.exists? tmpfile
    #
    # info about Passenger prespawn errors
    expect_log_error 'Cannot execute.*prespawn.*Permission denied (13)', tmpfile
    #
    # look for real Passenger startup problems
    out = `grep 'Passenger could not be initialized\\|Unable to start the Phusion Passenger watchdog' #{tmpfile}`
    do_fail <<-"ERR" unless out.empty?
      broker error_log: serious problem(s) with Passenger startup:
        #{out}
        The broker is probably not running correctly. rhc tool output may be confusing.
        Make sure selinux-policy is updated and check the Troubleshooting Guide for tips.
    ERR
    #
    # look for Passenger errors during operation
    out = `grep 'Unexpected error in mod_passenger' #{tmpfile}`
    do_fail <<-ERR unless out.empty?
      broker error_log: serious problem with Passenger operation:
        #{out.split("\n").first}
    ERR
    #
    # info about mcollective client log error
    out = `grep 'Could not start logger: Errno::EACCES' #{tmpfile}`
    if ! out.empty?
      logfile = "/var/log/mcollective-client.log"
      out.match /Errno::EACCES[^\/]+(\/\S+.log)/ and logfile = $1
      do_warn <<-FIXLOG
        broker error_log: this problem indicates mcollective client logging is failing:
          #{out}
        Please fix this issue with the following commands:
          # chown apache:root #{logfile}
          # service openshift-broker restart
        Alternatively, consider the following logging settings in MCollective client.cfg:
          loglevel = warn        # only log notable problems
          logger_type = console  # stdout or broker httpd error_log
      FIXLOG
    end
    #
    # done with the tmp file
    system "rm #{tmpfile}"
  end

  def test_broker_passenger_ps
    skip_test unless @is_broker
    verbose "checking the broker application process tree"
    cmd = 'pstree -A `cat /var/www/openshift/broker/httpd/run/httpd.pid`'
    out = `#{cmd}`
    example = <<-"PSEX"
      Output from: #{cmd}
      #{out}
      Should look similar to this:
      httpd-+-PassengerWatchd-+-PassengerHelper-+-ruby---{ruby}
            |                 |                 `-6*[{PassengerHelpe}]
            |                 |-PassengerLoggin---{PassengerLoggi}
            |                 `-3*[{PassengerWatch}]
            `-8*[httpd]
    PSEX
    return do_fail "Broker httpd doesn't seem to be running at all. Try 'service openshift-broker start'" unless out.include? "httpd"
    %w{PassengerWatchd PassengerHelper ruby PassengerLoggin}.find do |it|
      return false if out.include?(it) # process is there, continue checking others
      do_fail "pstree for broker is missing #{it}. Make sure selinux-policy RPM is up to date and check Troubleshooting guide. \n#{example}"
      return true # stop with first thing that's missing
    end
  end

  def test_for_nonrpm_rubygems
    skip_test if @project_is[:origin] # origin kind of relies on gem install
    verbose "checking for presence of gem-installed rubygems"

    # list out all gem directories and find those without RPM ownership
    gemdirs = `gem environment gempath`.chomp.split(':').
      map {|dir| dir + "/specifications"}.
      select {|dir| File.exists? dir}.
      map {|dir| dir + "/*.gemspec"}
    verbose "looking in #{gemdirs.join ' '}"
    disown = `ls #{gemdirs.join ' '} | xargs -n 1 rpm -qf`.
      split("\n").select {|line| line.end_with? "not owned by any package"}
    return if disown.empty?

    do_warn <<-"NOOO"
      The following lines indicate rubygems that were installed from outside
      sources rather than via yum/RPM. These are unsupported and likely to
      break OpenShift. If you see multiple versions of any gems with
      'gem list' you have likely overridden OpenShift-installed gems.
      Uninstall as necessary with 'gem uninstall <gemname> -v <version>'.
        \n#{disown.join("\n")}
    NOOO
  end

  def test_for_multiple_gem_versions
    # not sure if other projects will want this. uncomment to silence:
    # skip_test unless @project_is[:enterprise]
    verbose "checking for presence of gem-installed rubygems"
    multiples = `gem list`.split("\n").select {|gem| gem.include? ','}
    return if multiples.empty?

    do_warn <<-"NOOO"
      'gem list' indicates the following rubygems are installed with multiple
      versions. OpenShift gems should be installed via yum/RPM.
      If you have overridden OpenShift-installed gems, expect problems.
      Uninstall as necessary with 'gem uninstall <gemname> -v <version>'.
        \n#{multiples.join("\n")}
    NOOO
  end

  def test_node_httpd_error_log
    skip_test unless @is_broker
    #
    # create a tmp file with all of the unique error log statements after the most recent httpd start
    #
    tmpfile = Tempfile.new("oodiag-node-#{$$}").path
    logfile = '/var/log/httpd/error_log'
    verbose "no file #{logfile}" and skip_test if ! File.exists? logfile # log file just not there
    system %Q[sed -n 'H; /configured -- resuming normal operations/h; ${g;p;}' #{logfile} | sort -u > #{tmpfile}]
    # if no restart msg, just use the whole thing
    system %Q[sort -u #{logfile} > #{tmpfile}] if ! File.exists? tmpfile
    verbose "log #{logfile} is empty" and skip_test if ! File.exists? tmpfile
    #
    # look for proxy lb error
    out = `grep 'proxy: ap_get_scoreboard_lb.*failed in child .* for worker' #{tmpfile}`
    verbose <<-ERR unless out.empty?
      node httpd error_log: this log message is expected for now:
        #{out.split("\n").first}
      For details see: https://bugzilla.redhat.com/show_bug.cgi?id=892871
    ERR
    #
    # done with the tmp file
    system "rm #{tmpfile}"
  end

  def test_node_containerization_plugin
    skip_test unless @is_node

    config = OpenShift::Config.new

    plugins = config.get('OPENSHIFT_NODE_PLUGINS')

    observer = (plugins.split(',').include? 'openshift-origin-node/plugins/unix_user_observer' rescue false)

    # In OpenShift Origin, containerization is provided by the
    # CONTAINERIZATION_PLUGIN plug-in, or by the
    # openshift-origin-container-selinux plug-in if none is specified,
    # and the node runtime will verify that it can load the
    # containerization plug-in, so we do not need to check
    # CONTAINERIZATION_PLUGIN here.  However, we do need to ensure
    # that the unix_user_observer node plug-in is not loaded.
    do_fail <<-"PLUGIN" if observer
      The OPENSHIFT_NODE_PLUGINS setting in the node configuration file must NOT
      include the openshift-origin-node/plugins/unix_user_observer plug-in,
      which is superseded by the containerization plug-in, specified by the
      CONTAINERIZATION_PLUGIN setting.
    PLUGIN
  end

  def vmware_tools_installed?
    return true if %w[VMwareTools vmware-tools].any? {|p| @rpms[p]}
    return true if %w[vmware-toolbox vmware-toolbox-cmd vmware-config-tools.pl].any? {|c| executable? c}
    false
  end

  def test_node_mco_log
    skip_test unless @is_node
    #
    # create a tmp file with all of the unique log statements after most recent mcollective start
    logfile = "#{@scl_prefix}mcollective.log"
    logfile = [ "/var/log/openshift/node/#{logfile}", "/var/log/#{logfile}" ].find {|f| File.exist? f}
    tmpfile = Tempfile.new("oodiag-mco-#{$$}").path
    system %Q! sed -n 'H; /INFO.*The Marionette Collective.*started logging/h; ${g;p;}' #{logfile} | sed 's|^\w, \[[^]]*\]||' | sort -u > #{tmpfile} !
    # if no restart seen, just use the whole thing
    system %Q! sort -u /var/log/#{@scl_prefix}mcollective.log > #{tmpfile} ! unless File.exists? tmpfile
    return unless File.exists? tmpfile
    #
    # Look for the mco timeout warning
    out = `grep 'WARN.*created at [0-9]* is [0-9]* seconds old, TTL is' #{tmpfile}`
    unless out.empty?
      errors = vmware_tools_installed? ? <<-"VMWARE" : ""

        vmware-tools is installed and may conflict with ntpd's time management.
        Please ensure both are not managing the clock, disabling one if needed.
        See http://kb.vmware.com/kb/1189 for disabling vmware-tools time sync.
      VMWARE
      errors = <<-"TIMEOUT"
        Node mcollective log indicates time is too far out of sync with broker:
        #{out.split("\n").first}
        This typically causes the node to ignore broker queries and commands.\n#{errors}
        Please ensure that all node and broker hosts are synced to the same time source.
        Restart mcollective if this script is reporting an old log message.
      TIMEOUT
      do_fail errors
    end
    #
    # clean up temp file
    system "rm #{tmpfile}"
  end

  def test_pam_openshift
    skip_test unless @is_node
    do_fail <<-"SSHD" unless `grep '^\s*[^#].*pam_selinux' /etc/pam.d/sshd`.empty?
      References to pam_selinux in /etc/pam.d/sshd should all be changed
      to pam_openshift. User access to gears via ssh or git is likely to
      fail; please refer to the Deployment Guide for proper settings.
    SSHD
    %w[runuser runuser-l sshd su system-auth-ac].each do |file|
      do_fail <<-"PAM" unless `grep '/pam_namespace.so\s*no_unmount_on_close/' /etc/pam.d/#{file}`
        /etc/pam.d/#{file} should end with:
           session required pam_namespace.so no_unmount_on_close
        Without this, OpenShift gears as well as other processes
        may see very mysterious failures. Please refer to the
        Deployment Guide for proper settings.
      PAM
    end
  end

  def service_enabled?(svc)
    if @use_systemctl
      system "systemctl is-enabled #{svc}.service >& /dev/null"
    else #sysv
      system "chkconfig #{svc} >& /dev/null"
    end
    return $?.exitstatus == 0
  end
  def service_started?(svc)
    if @use_systemctl
      system "systemctl status #{svc}.service >& /dev/null"
    else #sysv
      system "service #{svc} status >& /dev/null"
    end
    return $?.exitstatus == 0
  end
  def test_services_enabled
    fail_without = []
    warn_without = []
    if @is_broker
      fail_without += %w{httpd sshd}
      fail_without += @project_is[:online] ? %w{rhc-broker rhc-site} : %w{openshift-broker}
      warn_without += %w{ntpd}
    end
    if @is_node
      fail_without += %w{httpd sshd openshift-iptables-port-proxy openshift-gears
                        cgconfig cgred crond openshift-node-web-proxy}
      fail_without << @scl_prefix + "mcollective"

      warn_without += %w{ntpd oddjobd messagebus openshift-watchman}
    end
    skip_test if fail_without.empty? && warn_without.empty?
       #
    verbose "checking that required services are running now"
    missing = fail_without.uniq.select {|svc| !service_started? svc}
    # "network" had to be special; always check it with "service" not "systemctl"
    system "service network status >& /dev/null"
    missing += "network" if ! $?.success?
    do_fail <<-REQ unless missing.empty?
      The following service(s) are not currently started:
        #{missing.join ", "}
      These services are required for OpenShift functionality.
    REQ
       #
    missing = warn_without.uniq.select {|svc| !service_started? svc}
    do_warn <<-WANT unless missing.empty?
      The following service(s) are not currently started:
        #{missing.join ", "}
      Unless you know they are not needed, please start these services.
    WANT
       #
    verbose "checking that required services are enabled at boot"
    missing = fail_without.uniq.select {|svc| !service_enabled? svc}
    do_fail <<-REQ unless missing.empty?
      The following service(s) are not started at boot time:
        #{missing.join ", "}
      These services are required for OpenShift functionality.
      Please ensure that they start at boot.
    REQ
       #
    missing = warn_without.uniq.select {|svc| !service_enabled? svc}
    do_warn <<-WANT unless missing.empty?
      The following service(s) are not started at boot time:
        #{missing.join ", "}
      Unless you know they are not needed, please ensure that they start at boot.
    WANT
  end

  def test_missing_iptables_config
    skip_test unless @is_node
    iptables_conf = '/etc/sysconfig/iptables'
    return if File.exists? iptables_conf

    do_fail <<-BUG
      #{iptables_conf} is missing.  Rebooting this system will likely lead to a
      broken firewall.  Ensure that the openshift-iptables-port-proxy service
      is running and all critical services are currently allowed by the running
      firewall then run 'service iptables save'.
    BUG
  end

  def test_system_config_firewall
    skip_test unless @is_node
    conf_file = '/etc/sysconfig/system-config-firewall'

    `grep enabled #{conf_file}`
     have_system_config_firewall = 0 == $?.to_i
     if have_system_config_firewall
       do_warn <<-WARN
         Using system-config-firewall and lokkit with OpenShift is not recommended.
         To continue using lokkit please ensure the following custom rules are 
         installed in #{conf_file}:

         --custom-rules=ipv4:filter:/etc/openshift/system-config-firewall-compat
         --custom-rules=ipv4:filter:/etc/openshift/iptables.filter.rules
         --custom-rules=ipv4:nat:/etc/openshift/iptables.nat.rules
       WARN
     end

  end

  def test_node_quota_bug
    skip_test unless @is_node && @os_is[:rhel6]
    verbose "testing for quota creation failure bug"
    # if the gear home doesn't have its own file system, bug doesn't apply
    return if `df -P /var/lib/openshift | grep /var/lib/openshift`.empty?
    # if the quota file was created, we're probably set (assuming correct SELinux context)
    return if File.exists? '/var/lib/openshift/aquota.user'
    # if selinux policy is at least selinux-policy-3.7.19-155.el6_3.14 it's fixed
    do_fail <<-BUG unless is_rpm_minimum_version?(%w[selinux-policy 3.7.19 155.el6_3.14])
      There is a bug for initializing gear quotas when /var/lib/openshift has
      its own partition. See https://bugzilla.redhat.com/show_bug.cgi?id=880369#c29
      for a workaround (and the rest of the bug for details). There is a fix in
      selinux-policy-3.7.19-155.el6_3.14
    BUG
  end

  def test_vhost_servernames
    skip_test unless @is_node || @is_broker
    verbose "checking for vhost interference problems"
    file_for_name = {}
    name_for_file = {}
    `httpd -S 2> /dev/null`.split("\n").map do |line|
      if %r!     443\s+(?:namevhost\s+)? ([\w.-]+) \s+ \(.+/([^/]+)\.conf:   !x.match(line)
        # e.g "*:443                     localhost     (/etc/httpd/conf.d/ssl.conf:25)"
        # or  "port 443 namevhost        example.com   (/etc/httpd/conf.d/ssl.conf:74)"
        file_for_name[$1] ||= $2
        name_for_file[$2] = $1
      end
    end
    if file_for_name[name_for_file["ssl"]] == "ssl"
      # ssl.conf has the first/only vhost with its servername
      do_warn <<-CONFLICTS
        The VirtualHost defined in /etc/httpd/conf.d/ssl.conf has the ServerName
        #{name_for_file["ssl"]} and will respond with a 404 to all requests at
          https://#{name_for_file["ssl"]}/
        Please remove it by running this command:
          sed -i '/VirtualHost/,/VirtualHost/ d' /etc/httpd/conf.d/ssl.conf
      CONFLICTS
    elsif name_for_file["ssl"]
      # well, we just don't want the ssl.conf vhost anyway.
      do_warn <<-REMOVE
        The VirtualHost defined by default in /etc/httpd/conf.d/ssl.conf is not needed
        and can cause spurious warnings. Please remove it by running this command:

          sed -i '/VirtualHost/,/VirtualHost/ d' /etc/httpd/conf.d/ssl.conf
      REMOVE
    end
    bname = name_for_file['000002_openshift_origin_broker_proxy']
    nname = name_for_file['000001_openshift_origin_node']
    if bname && nname && bname != nname
      # broker and node both installed, node may be stealing traffic
      do_warn <<-CONFLICT
        /etc/httpd/conf.d/000001_openshift_origin_node_servername.conf defines a 
        ServerName #{nname} which may cause the node intercept requests by that name
        intended for the broker. Please reconfigure with the same ServerName as the 
        one in /etc/httpd/conf.d/000002_openshift_origin_broker_servername.conf
      CONFLICT
    end
  end

  def test_altered_package_owned_configs
     `rpm -q mlocate`
     have_mlocate_pkg = 0 == $?.to_i
     unless have_mlocate_pkg
       do_warn <<-WARN
          The mlocate package is not installed. mlocate is not a required runtime package; however,
          you may install mlocate to enable further diagnostics checking.
       WARN
     end
     if have_mlocate_pkg
        `updatedb`
        out = `locate --regex \\.rpmsave\$ \\.rpmnew\$`

        do_warn <<-"WARN" unless out.empty?
           RPM package owned configuration files have been altered:
             #{out}
           Ensure any package-owned configuration files which have been
           altered are accurate. This may require a manual merge of
           your previous alterations. Once you are comfortable with the merge,
           remove the reported .rpm* configuration file (or you will continue
           to see this warning each time you run the diagnostic test).
       WARN
     end
  end

  def test_broken_httpd_version
    httpd = @rpms['httpd'] or skip_test
    version = "#{httpd[:version]}-#{httpd[:release]}"
    %w{ 2.2.22-14.ep6.el6 2.2.17-15.4.ep5.el6 }.include? version and do_fail <<-BORKED
      httpd-#{version} is installed. This version includes serious known issues that
      impact OpenShift operations.  Please upgrade or downgrade httpd accordingly.
      For details see: https://access.redhat.com/knowledge/articles/311023
      BORKED
  end

  def test_usergroups_enabled
    skip_test unless @is_node
    #Check if login.defs has usergroups enabled
    ugs = `grep -i 'USERGROUPS_ENAB yes' /etc/login.defs`
    if(ugs.empty?)
      do_fail <<-UGS
        The USERGROUPS_ENAB setting is either missing or set to no in /etc/login.defs.
        This will prevent creating user groups for new gears that are added to the system.
        Fix this by editing /etc/login.defs to add/fix the line 'USERGROUPS_ENAB yes'
      UGS
    end
  end

  def test_mcollective_context
    skip_test unless @is_node
    # Check if the context for running mcollective process is correct -
    # it should be {unconfined_u,system_u}:system_r:openshift_initrc_t:s0-s0:c0.c1023
    # One bug would express when commands were run through oo-spawn/shellExec.
    # Under certain circumstances they would not have the dominating MCS label
    # (s0-s0:c0.c1023) and not be able to affect gears.
    scl_root = @scl_prefix.empty? ? "" : "/opt/rh/ruby193/root"
    context = `ps -o label= $(pgrep -f "ruby[^[:space:]]*[[:space:]]#{scl_root}/usr/sbin/mcollectived([[:space:]]|$)")`.chomp.split ":"
    context.shift
    context = context.join ":"
    expected = "system_r:openshift_initrc_t:s0-s0:c0.c1023"
    unless context == expected
      do_fail <<-CONTEXT
      Mcollectived is not running in the expected SELinux context, which
      may result in node execution failures. Please check that the correct
      context is set on #{scl_root}/usr/sbin/mcollectived and that the correct SELinux
      policies are loaded.
        Expected: #{expected}
        Found: #{context}
      CONTEXT
      # TODO: which policies are those?
    end
  end

  def test_mcollective_bad_facts
    skip_test unless @is_broker
    #Grep for a NaN of max active gears from mcollective
    max_active_gears = `oo-mco facts max_active_gears | grep -i NaN`
    #If not empty, then there is a problem
    if(!max_active_gears.empty?)
      do_fail <<-NAN
      The max_active_gears fact is NaN (Not a Number) for at least one
      node host. This will prevent proper gear distribution by the broker
      and could result in node hosts being overloaded. Please check for
      problems on these node hosts, such as AVC denials in /var/log/audit/audit.log
      or misconfigurations in /etc/openshift/resource_limits.conf
      NAN
    end
  end

  def test_auth_conf_files
    skip_test unless @is_broker
    #Boolean for aborting match check
    do_match_check = true

    if File.exists? '/var/www/openshift/console'
      console = `ls /var/www/openshift/console/httpd/conf.d/*auth*.conf 2> /dev/null | grep -v -- '-dev.conf$'`.split
      #Make sure there is only 1 conf file for the console
      if(console.size > 1)
        do_match_check = false
        do_warn <<-CONSOLE
          There is more than one authentication configuration file for the
          console in /var/www/openshift/console/httpd/conf.d
          Authentication configuration files:
          #{console.join "\n\t"}
        CONSOLE
      elsif console.empty?
        do_match_check = false
      end
    else
      do_match_check = false
    end

    broker = `ls /var/www/openshift/broker/httpd/conf.d/*auth*.conf 2> /dev/null | grep -v -- '-dev.conf$'`.split
    #Make sure there is only 1 conf file for the broker
    if(broker.size > 1)
      do_match_check = false
      do_warn <<-BROKER
        There is more than one authentication configuration file for the
        broker in /var/www/openshift/console/httpd/conf.d
        Authentication configuration files:
        #{broker.join "\n\t"}
      BROKER
    end

    #If conf files don't match, then it is possible they are using two different auth types.
    if(do_match_check && console.first.split("/")[-1] != broker.first.split("/")[-1])
      do_warn <<-MISMATCH
        The two authentication configuration file names set up for the console and broker do not match;
        please ensure that they are using the same authentication mechanism.
        Authentication configuration files:
        Broker: #{broker.first}
        Console: #{console.first}
      MISMATCH
    end

  end

  def test_broker_certificate
    skip_test unless @is_broker
    require 'socket'

    # Retrieve the SSL cert from Apache
    response = `curl -k -s -v https://localhost/ -o /dev/null 2>&1`
    issuer = response.slice(/^\*\s*issuer: .*$/).gsub(/^\*\s*issuer: /,"")
    subject = response.slice(/^\*\s*subject: .*$/).gsub(/^\*\s*subject: /,"")
    commonname = response.slice(/^\*\s*common name: .*$/).gsub(/^\*\s*common name: /,"")
    if subject == issuer
      do_warn "Using a self-signed certificate for the broker"
    end

    apacheconfig = `httpd -S 2> /dev/null`.slice(  / ^\*:443.*  (\n^\s.*)*  \n(\S|\z) /x  )
    servername = apacheconfig.scan(/(?:(?:default server )|(?:port 443 namevhost ))(\S+) \((?:[^:]+)/)
    badnames = []
    servername.each do |sn|
      # does the certificate common name match ServerName
      if commonname != sn[0] and commonname != sn[0].gsub(/^[^.]*/,"*")
        badnames << sn[0]
      end
    end

    if badnames.size > 0
      badnames.uniq.each do |badname|
        config_files = `grep -l -e "^[[:blank:]]*ServerName[[:blank:]]*#{badname}" /etc/httpd/conf.d/*.conf`
        config_files.each_line do |config_file|
          do_warn <<-CONFLICT
            #{config_file.chomp} 
            defines ServerName as #{badname}.  This does not match the certificate common name of 
            #{commonname}.  
            This can cause errors when client tools try to connect to the broker.
          CONFLICT
        end
      end
    end
  end

  def test_abrt_addon_python
    skip_test unless @is_node
    if @os_is[:rhel64]
      verbose "Checking for v2 python cart/abrt incompatibility (BZ907449)"
      rogue_rpms = []
      if @rpms.has_key?('abrt-addon-python') and @rpms.has_key?('openshift-origin-cartridge-python')
        rogue_rpms << "abrt-addon-python has a known conflict with openshift-origin-cartridge-python (https://bugzilla.redhat.com/show_bug.cgi?id=907449)"
      end
      rogue_rpms.empty? or do_fail <<-ROGUES
        The following problems were found with your RPMs: \n\t#{ rogue_rpms.join("\n\t") }
      ROGUES
    end
  end

  def test_node_frontend_clash
    skip_test unless @is_node
    conflicts = %w[ rubygem-openshift-origin-frontend-apache-mod-rewrite
                    rubygem-openshift-origin-frontend-apache-vhost ]
    if conflicts.all? {|rpm| @rpms[rpm]}
      # these both provide a conf file in /etc/httpd/conf.d which conflict if both are installed.
      config = OpenShift::Config.new
      plugins = config.get('OPENSHIFT_FRONTEND_HTTP_PLUGINS').split(',').map {|it| "rubygem-" + it}
      remove = conflicts - plugins
      do_fail <<-CONFLICT
        Conflicting RPMs are installed:
          rubygem-openshift-origin-frontend-apache-mod-rewrite
          rubygem-openshift-origin-frontend-apache-vhost
        When both are present, apps are likely to be unreachable.
        Please remove the one not used in node.conf:
          yum remove #{remove.join " "}
        Then, restart the httpd service.
      CONFLICT
    end
  end

  def test_yum_configuration
    if executable? "oo-admin-yum-validator"
      output = `oo-admin-yum-validator --report-all 2>&1`
      $?.success? or do_warn <<-YUM 
        oo-admin-yum-validator reported some possible problems
        with your package source configuration:
--------------------------------------------------------------
      #{output}
--------------------------------------------------------------
        Incorrect package source configuration could lead to
        failure to install the correct RPMs.
      YUM
    elsif @project_is[:enterprise]
      do_warn <<-YUM
        oo-admin-yum-validator is not installed. Please install with:
          yum install openshift-enterprise-release
        This tool helps validate and fix your package source
        configuration. Incorrect configuration could lead to
        failure to install the correct RPMs.
      YUM
    end
  end

  def test_node_env_vars_match
    skip_test unless @is_node
    config = OpenShift::Config.new
    %w[ BROKER_HOST CLOUD_DOMAIN ].each do |var|
      contents = File.read("/etc/openshift/env/OPENSHIFT_#{var}").chomp
      if contents != config.get(var)
        do_warn <<-"MISMATCH"
          /etc/openshift/env/OPENSHIFT_#{var} contains '#{contents}'
          /etc/openshift/node.conf:#{var} specifies '#{config.get var}'
          These should match; an incorrect value in either case could
          cause problems. node.conf values are used in defining application
          DNS records and proxy routing, while env var files are used for
          contacting the broker for application management actions.
        MISMATCH
      end
    end
  end

  def test_apache_can_read_conf_files
    # This is not intended to have perfect accuracy. It is intended to catch the common
    # problem where root umask creates conf files unreadable by apache during deployment.
    query = '-type f ! \( -user apache -o \( -group apache -perm -g=r \) -o -perm -o=r \)'
    # Find files not owned by apache, not readable by apache group, and not world-readable
    # Note: we will leave selinux labels for a different test
    unreadable = []
    broker_unreadable = node_unreadable = []
    if @is_broker
      dirs = "/etc/openshift /etc/httpd/conf.d /var/www/openshift/{broker,console}/httpd/"
      names = %w[*.conf *.pem htpasswd quickstarts.json].map {|n| "-name '#{n}'" }
      names = '\( ' +  names * ' -o ' + ' \)'
      broker_ok = %w[express node resource_limits].map {|f| "/etc/openshift/#{f}.conf" }
      broker_unreadable = %x[find #{dirs} #{names} #{query} 2> /dev/null].split(/\n/) - broker_ok
      dirs = "/etc/httpd/conf.d"
      broker_unreadable += %x[find #{dirs} #{query}].split(/\n/)
    end
    if @is_node
      dirs = "/etc/httpd/conf.d /var/lib/openshift/.httpd.d "
      node_unreadable = %x[find #{dirs} #{query}].split(/\n/)
    end
    unreadable = (broker_unreadable + node_unreadable).uniq
    unless unreadable.empty?
      do_warn <<-"UNREADABLE"
        The following configuration files have names and locations indicating
        that the apache user should be able to read them, but are not readable
        by the apache user:
          
          #{unreadable * "\n          "}
          
        #{ broker_unreadable.empty? ? "" : "The broker and console services may malfunction without read access to these files." }
        #{ node_unreadable.empty? ? "" : "The host httpd server may malfunction without read access to these files."}
      UNREADABLE
    end
  end

end #class OODiag


############ EXECUTION ##########
#
# If this script is running directly, just go ahead and run tests.
# In a different context (e.g. irb) just load and don't run anything.

if __FILE__ == $0

  #
  # Options parsing...
  #
  require 'optparse'
  options = {
    :wait => 2,
    :verbose => false,
  }
  optparse = OptionParser.new { |opts|
    opts.banner = <<-"USAGE"
      #{$0}: Detect common problems on OpenShift systems

      Usage: #{$0} [switches] [test methods to run]
      Example: #{$0}
      Example: #{$0} -v -w 1 test_broker_accept_scripts

      Switches:
    USAGE

    opts.on('-v','--verbose', 'Print verbose statements') { |verbose| options[:verbose] = verbose }
    opts.on('-w','--wait seconds', Float,
            'Seconds for broker to wait for node responses (default 2)') { |wait| options[:wait] = wait }
    opts.on('-o','--abortok', 'Continue tests even when an abort is thrown by a test') { |abortok| options[:abortok] = abortok }
    opts.on('-h','--help', 'Print usage') { puts opts; exit 0 }
  }

  begin
    optparse.parse!
  rescue OptionParser::InvalidArgument => e
    puts "\n ##### #{e.message} #####"
    puts optparse.to_s
    puts "\n ##### #{e.message} #####"
    puts
    exit 1
  end
  options[:tests] = ARGV

  begin
    #
    # execute
    #
    o = OODiag.new(options)
    o.run_setup
    warns, errors = o.run_tests

    #
    # summarize
    #
    o.wputs "#{warns} WARNINGS" if warns > 0
    if errors > 0
      o.eputs "#{errors} ERRORS"
    else
      puts "NO ERRORS"
    end
    exit errors
  rescue Interrupt
    puts "\nExiting due to user interrupt (^C)."
    exit 1
  end
end

