#!/usr/bin/env oo-ruby
#
# This can be used as a script or library on an OpenShift broker host.
# As a script, the default output is for command-line viewing, or choose
# --format tsv for something you can analyze in your favorite spreadsheet,
# or json/xml/yaml to process all the data in your tool of choice.
# Run with the -h flag to view options.
#
# To use as a library, do the following in irb or your ruby script:
# load 'oo-stats'
# stats = OOStats.new
# stats.gather_statistics
#
# ... now stats.results gives you a hash with all the data gathered.
# You can also use the set_option and set_columns methods to
# customize the output you get from stats.display_results.
#
# A brief description of the returned statistics can be found in comments
# in the stats library - current location:
# https://github.com/openshift/origin-server/blob/controller/app/models/admin/stats.rb
#
# Discrepancies between counts in MongoDB vs. nodes should be either
# 1. transient (gear create/destroy in progress), or
# 2. indicative of a problem (broken gear on node or miscount in DB).

#--
# Copyright 2013 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

require 'rubygems'

class OOStats

  # called when OOStats.new is called
  def initialize(options = nil)
    @options = options || @options || { # default if none given
      :wait => 2,
      :format => :text,
    }
    @time = {}
    if @options[:read_file]
      @stats = OOStats::Reader.new(@options)
    else
      @time[:load_broker_environment] = time_msecs { load_broker_rails_env }
      @stats = Admin::Stats::Maker.new(@options)
    end
    set_initial_columns
  end

  # Available options are essentially the same as script options
  # e.g. set_option :format => :xml
  def set_option(options)
    @stats.set_option(options)
    @options.merge! options
  end

  # set the columns displayed for text/tsv reports
  def set_columns(columns_for_table_hash)
    # text_tableize or tsv_tableize use these. If the column list they get
    # is nil (because it wasn't specified) they auto-generate columns for you.
    # See set_initial_columns below for example usage.
    @columns_for ||= {}
    @columns_for.merge! columns_for_table_hash
  end

  # these are the column lists you get by default
  def set_initial_columns
    set_columns :profile_summary =>
        %w{district_count district_capacity dist_avail_capacity dist_avail_uids
          lowest_dist_usage_pct highest_dist_usage_pct avg_dist_usage_pct
          nodes_count nodes_active total_apps
          gears_total_count total_gears_in_db_records gear_count_db_minus_node
          gears_active_count available_active_gears effective_available_gears
        }.map{|k| k.to_sym},
     :district_table =>
        %w{name nodes_count dist_avail_capacity gears_active_count
           effective_available_gears lowest_active_usage_pct avg_active_usage_pct
        }.map{|k| k.to_sym},
     :district_summary =>
        %w{profile district_capacity dist_avail_capacity dist_avail_uids
          nodes_count nodes_active gears_total_count
          gears_active_count available_active_gears effective_available_gears
          lowest_active_usage_pct highest_active_usage_pct avg_active_usage_pct
        }.map{|k| k.to_sym},
     :node_table =>
        %w{name gears_total_count gears_usage_pct gears_idle_count
           gears_active_count max_active_gears gears_active_usage_pct
        }.map{|k| k.to_sym}
  end

  # use to time an operation in milliseconds
  def time_msecs
    start_time = (Time.now.to_f * 1000).to_i
    yield
    return (Time.now.to_f * 1000).to_i - start_time
  end

  # gather all statistics and analyze
  def gather_statistics
    return @time.merge! @stats.gather_statistics
  end

  # Bundle up the statistics results in a hash
  def results
    @stats.results.merge(:timings_msecs => @time)
  end

  # Print results to stdout according to current options
  def display_results
    r = results
    classless = Admin::Stats::HashWithReaders.deep_clear_subclasses(r)
    case @options[:format] 
    when :yaml
      require 'yaml'
      puts (@options[:keep_classes] ? r : classless ).to_yaml
    when :xml
      # rails supplies this one
      puts (@options[:keep_classes] ? r : classless ).to_xml
    when :json
      require 'json'
      puts classless.to_json
    when :tsv
      display_results_tsv r
    else # :text
      display_results_text r
    end
    nil
  end

  # Load the broker rails environment so we can leverage its tools
  def load_broker_rails_env
    begin
      require "#{ENV['OPENSHIFT_BROKER_DIR'] || '/var/www/openshift/broker'}/config/environment"
      # Disable analytics for admin scripts
      Rails.configuration.analytics[:enabled] = false
      # Wait this long for answers from node hosts
      #Rails.configuration.msg_broker[:rpc_options][:disctimeout] = @options[:wait]
      #Admin::Stats handles this directly
      # Get a read-only DB connection (to a secondary if possible)
      @db = OpenShift::DataStore.db
    rescue Exception => e
      puts <<-"FAIL"
        Broker application failed to load; aborting.
        The error was: #{e.message}
      FAIL
      exit 1
    end
  end

  def outline(str)
    puts '-' * str.length
    puts str
    puts '-' * str.length
  end

  def display_results_text(res=results)

    # first, display the per-user usage if available
    if @options[:db_stats] 
      if users = res[:db_count_per_user]
        outline "Usage of apps and gears by user:"
        # columns are the keys (:small_apps :small_gears etc) with :login at front
        text_tableize users, @columns_for[:user_table]
        puts "\n\n"
      end

      if users_for_count = res[:count_all][:users_with_num_apps]
        outline "Distribution of apps usage (app count : users) :"
        text_legendize users_for_count
        puts "\n\n"
      end

      if users_for_count = res[:count_all][:users_with_num_gears]
        outline "Distribution of gears usage (gear count : users) :"
        text_legendize users_for_count
        puts "\n\n"
      end
    end

    unless @options[:only]
      if cartridges = res[:count_all][:cartridges]
        # display system-wide cartridge usage
        outline "System-wide usage of cartridges:"
        cartridges.sort_by {|cart,count| cart}.each { |duple| puts "  #{duple[0]} : #{duple[1]}" }
        puts "\n\n"
      end
      # display systems info
      case @options[:level]
      when :profile
        res[:profile_summaries].sort_by {|a| a[:profile]}.each do |profile|
          # print summary for that profile
          outline "Profile '#{profile[:profile]}' summary:"
          text_legendize profile, @columns_for[:profile_summary]
          puts "\nDistricts:"
          text_tableize profile[:districts].sort_by {|a| a[:dist_avail_capacity]}, @columns_for[:district_table]
          unless profile[:missing_nodes].empty?
            puts "\nWARNING: the following districted node(s) in this profile DID NOT respond:"
            puts profile[:missing_nodes].join ", "
          end
          puts "\n\n"
        end
      when :district
        res[:district_summaries].sort_by {|a| a[:profile] + a[:name] }.each do |district|
          # print summary for that district
          outline "District '#{district[:name]}' summary:"
          text_legendize district, @columns_for[:district_summary]
          puts "\nNodes:"
          text_tableize district[:nodes].sort_by {|a| a[:name]}, @columns_for[:node_table]
          unless district[:missing_nodes].empty?
            puts "\nWARNING: the following node(s) in this district DID NOT respond:"
            puts district[:missing_nodes].join ", "
          end
          puts "\n\n"
        end
      else # :node
        missing_nodes = []
        res[:district_summaries].sort_by {|a| a[:profile] + a[:name] }.each do |district|
          missing_nodes += district[:missing_nodes]
          puts "Nodes for district '#{district[:name]}' with profile '#{district[:profile]}':"
          text_tableize district[:nodes].sort_by {|a| a[:name]}, @columns_for[:node_table]
          puts "\n\n"
        end
        unless missing_nodes.empty?
          puts "\nWARNING: the following districted node(s) DID NOT respond:"
          puts missing_nodes.join ", "
          puts
        end
      end
      outline "Summary for all systems:"
      text_legendize res[:count_all], @columns_for[:count_all_legend]
      puts "\n\n"
    end
  end


  # display tab-separated values (spreadsheet friendly)
  def display_results_tsv(res=results)
    # Right now no column lists have been defined, so all are generated on the fly.
    # Use "set_columns" to create column orderings for these if desired
    puts "Resource usage summary:"
    tsv_legendize res[:count_all], @columns_for[:tsv_db_count_all]

    missing_nodes = res[:profile_summaries].inject([]) {|a,p| a += p[:missing_nodes]}
    unless missing_nodes.empty?
      puts "\n\nWARNING: these districted nodes DID NOT respond:"
      missing_nodes.each {|n| puts n}
    end

    puts "\n\nPer-gear-profile usage summary:"
    tsv_tableize res[:profile_summaries].sort_by {|h| h[:profile]},
        @columns_for[:tsv_profile_summaries]

    puts "\n\nPer-district usage summary:"
    tsv_tableize res[:district_summaries].sort_by {|h| "#{h[:profile]}-#{h[:name]}"},
        @columns_for[:tsv_district_summaries]

    puts "\n\nPer-node usage summary:"
    tsv_tableize res[:node_entries].sort_by {|h| "#{h[:node_profile]}-#{h[:name]}"},
        @columns_for[:tsv_node_entries]

    if res[:db_count_per_user] # --db option specified
      puts "\n\nPer-user usage summary:"
      tsv_tableize res[:db_count_per_user], @columns_for[:tsv_db_count_for_user]
    end

    if users_for_count = res[:count_all][:users_with_num_apps]
      puts "\n\nDistribution of apps usage (app count : users) :"
      tsv_legendize users_for_count
    end

    if users_for_count = res[:count_all][:users_with_num_gears]
      puts "\n\nDistribution of gears usage (gear count : users) :"
      tsv_legendize users_for_count
    end

    if res[:count_all][:cartridges] # --db option specified
      puts "\n\nCartridge usage summary:"
      tsv_legendize res[:count_all][:cartridges], @columns_for[:tsv_cartridge_legend]
      # also break them down by profile
      res[:profile_summaries].sort_by {|h| h[:profile]}.each do |profile|
        puts "\n\nCartridge usage summary for profile '#{profile[:profile]}':"
        tsv_legendize profile[:cartridges], @columns_for[:tsv_cartridge_legend]
      end
    end

    puts "\n\nTimings for gathering this information (milliseconds):"
    tsv_legendize res[:timings_msecs], @columns_for[:tsv_timings]
  end


  def auto_column_list(hashes)
    # automatically build the column list using those keys which have "simple" values
    cols = {}
    hashes.each do |hash|
      hash.each {|key,value| cols[key] = true if [String, Symbol, Integer, Fixnum].include? value.class}
    end
    cols = cols.keys.sort_by(&:to_s) # if you wanted a specific order you wouldn't be here
    # OK let's make an exception to bring a few identifier columns to the front
    [:name, :profile, :node_profile, :login].each {|col| cols.include?(col) and cols = [col] + (cols - [col])}
    return cols
  end

  def tsv_legendize(hash, order=nil)
    order ||= auto_column_list([hash])
    order.each {|key| puts "#{key.to_s.humanize}\t#{hash[key]}" }
  end

  def tsv_tableize(hashes, cols=nil)
    cols ||= auto_column_list(hashes)
    # print columns
    puts cols.map {|col| col.to_s.humanize}.join "\t"
    # print all records
    hashes.each {|hash| puts cols.map {|col| hash[col]}.join "\t"}
  end


  # display data in formatted key:value "legend"
  def text_legendize(hash, order=nil)
    order ||= auto_column_list([hash])
    humanized = {}
    max_size = []
    order.each {|key| max_size << (humanized[key] = key.to_s.humanize).length }
    max_size = max_size.max.to_s
    order.each {|key| printf " %#{max_size}s : %s\n", humanized[key], to_text(hash[key]) unless hash[key].nil? }
  end

  # display data in a nicely formatted table
  def text_tableize(rows, cols=nil)
    cols ||= auto_column_list(rows)
    show_for_col = {}
    # figure out how to display each column
    cols.each do |col|
      name = col.to_s.camelize.
        gsub(/Count$/, '').
        gsub(/Total$/, '').
        gsub(/Total/, '#').
        gsub(/Effective/, 'Eff').
        gsub(/Available/, 'Avail').
        gsub(/Active/, 'Actv').
        gsub(/Usage/, 'Usg').
        gsub(/Capacity/, 'Cap').
        gsub(/Lowest/, 'Lo').
        gsub(/Highest/, 'Hi').
        gsub(/Average/, 'Avg')
      size = (rows.map {|row| to_text(row[col]).length} + [name.length]).max
      show_for_col[col] = {:name => name, :size => size}
    end
    # print table header and divider
    puts cols.map {|col| c=show_for_col[col]; sprintf "%#{c[:size]}s", c[:name] }.join ' '
    puts cols.map {|col| '-' * show_for_col[col][:size] }.join ' '
    # print table contents
    rows.each do |row|
      puts cols.map {|col| sprintf "%#{show_for_col[col][:size]}s", to_text(row[col])}.join ' '
    end
  end

  def to_text(x)
    case x
    when nil
      ""
    when Float
      x.round(1).to_s
    when Integer
      x.to_s.reverse.gsub(/(\d{3})\B/, '\1,').reverse
    else
      x.to_s
    end
  end

  # stats mock that reads dataset from a file instead.

  class Reader
    #
    # some ugly hackish things to avoid loading the entire broker
    #
    require 'rubygems'
    gem 'activesupport'
    require 'active_support/inflector'
    gem 'openshift-origin-controller'
    require 'admin/stats/results'

    def initialize(options = {})
      @options = options
      @results = nil
    end

    def set_option(options)
      @options.merge! options
    end

    # read the file given
    def gather_statistics
      case file = @options[:read_file]
      when nil
        raise "No file given"
      when /(yml|yaml)$/
        require 'yaml'
        unless (@results = YAML.load_file file).is_a? Admin::Stats::HashWithReaders
          @results = Admin::Stats::HashWithReaders.deep_convert_hashes(@results)
        end
      when /(json|jsn|js)$/
        require 'json'
        @results = JSON.parse( IO.read(file) )
        @results = Admin::Stats::HashWithReaders.deep_convert_hashes(@results)
      else
        puts "Don't know how to parse file '#{file}'"
        puts "Please change extension to .yaml"
        exit(1)
      end
      return @results[:timings_msecs] || {}
    end

    def results
      @results
    end
  end # class Reader


end #class OOStats


############ EXECUTION ##########
#
# If this script is running directly, gather the information and display.
# In a different context (e.g. irb) just load the class and don't run anything.

if __FILE__ == $0

  #
  # Options parsing...
  #
  require 'optparse'
  options = {
    :wait => 2,
    :level => :profile,
    :format => :text,
    :keep_classes => false,
  }
  optparse = OptionParser.new { |opts|
    opts.banner = <<-"USAGE"
      #{$0}: Output usage statistics from this installation.

      Usage: #{$0} [switches]
      Example: #{$0}              # standard text display
      Example: #{$0} --wait 10    # for slower node responses
      Example: #{$0} --format yaml
      Example: #{$0} --read-file saved.yaml

      Switches:
    USAGE

    opts.on('-w','--wait SECONDS', Float, <<WAIT) { |wait| options[:wait] = wait }
Seconds for broker to wait for node responses (default 2).
\tIf nodes are not responding in time, increase this as needed.
WAIT
    opts.on('-d','--db', <<USER) { |x| options[:db_stats] = x }
Gather and include MongoDB usage stats.
\tThis scans all users, apps, and gears in the MongoDB. With thousands
\tof users and applications, this may take seconds or minutes and
\tput noticeable load on MongoDB.
USER
    opts.on('-f','--format FORMAT', [:json, :xml, :yaml, :tsv, :text],
            'Choose output format (json, tsv, xml, yaml, default: text)') { |x| options[:format] = x }
    opts.on('-k','--keep-classes', <<KEEP) {|x| options[:keep_classes] = x }
\t\tWhen writing YAML or XML, normally the results are written as ordinary hashes
\t\tso the data can be consumed without needing Admin::Stats loaded.
\t\tWith this option, results will keep the subclasses that Admin::Stats gives them.
KEEP
    opts.on('-l','--level LEVEL', [:profile, :district, :node], <<LEVEL) { |l| options[:level] = l }
For text format, print statistical summary at this level:
\tprofile: profile and district summary statistics (default)
\tdistrict: district summary and node statistics
\tnode: node statistics only
LEVEL
    opts.on('-u','--only-user', <<FORMAT) { |x| options[:db_stats] = options[:only] = x }
With text format, show ONLY per-user usage summary (implies --db)
FORMAT
    opts.on('-r','--read-file FILE', String, <<READ) { |x| options[:read_file] = x }
Don't actually get data from the system; read it from the YAML file given.
READ
    opts.on('-h','--help', 'Print usage') { puts opts; exit 0 }
  }

  begin
    optparse.parse!
  rescue OptionParser::InvalidArgument => e
    puts "\n ##### #{e.message} #####"
    puts optparse.to_s
    puts "\n ##### #{e.message} #####"
    puts
    exit 1
  end

  #
  # execution
  #
  o = OOStats.new(options)
  o.gather_statistics
  o.display_results

end
