#!/usr/bin/env oo-ruby

#--
# Copyright 2012 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

require 'getoptlong'
require 'time'

def usage
  puts <<USAGE
== Synopsis

#{$0}:  Utility to check and fix various inconsistencies in mongo data

The following issues can be fixed with this script:
  - mismatch between user's consumed gears and actual gears across all domains/applications for the user
  - mismatch between the ssh keys in mongo and the ssh keys on the gear on the node
  - ssh keys and environment variables that are left behind after their reference component/gear is removed
  - mismatch in available UIDs for a district and actual UIDs used by the gears on the nodes within the district
  - mismatch in user resource consumption in mongo
  - apps due to server node down/decommissioned

== Usage

#{$0} OPTIONS

Options:
-v|--verbose
    Print information about each check being performed
-r|--report-only
    Only report the mismatches, don't fix them
--consumed-gears
    Fix  mismatch in user's consumed gears vs actual gears in mongo
--ssh-keys
    Fix  mismatch in SSH keys between mongo and on the node for a gear
--district-uids
    Fix mismatch in available UIDs for a district in mongo
--removed-nodes
    Fix or cleanup apps due to server node down/decommissioned
--usage
    Fix resource usage mismatches in mongo
-h|--help
    Show Usage info
USAGE
  exit 255
end

args = {}
begin
  opts = GetoptLong.new(
    ["--verbose",          "-v", GetoptLong::NO_ARGUMENT],
    ["--report-only",      "-r", GetoptLong::NO_ARGUMENT],
    ["--ssh-keys",               GetoptLong::NO_ARGUMENT],
    ["--district-uids",          GetoptLong::NO_ARGUMENT],
    ["--consumed-gears",         GetoptLong::NO_ARGUMENT],
    ["--removed-nodes",          GetoptLong::NO_ARGUMENT],
    ["--confirm",                GetoptLong::REQUIRED_ARGUMENT], # Only used for test automation
    ["--usage",                  GetoptLong::NO_ARGUMENT],
    ["--help",             "-h", GetoptLong::NO_ARGUMENT]
  )
  opts.each{ |k, v| args[k] = v }
rescue GetoptLong::Error => e
  usage
end

usage if args["--help"]

report_only = args["--report-only"]
fix_ssh_keys = args["--ssh-keys"]
fix_district_uids = args["--district-uids"]
fix_consumed_gears = args["--consumed-gears"]
fix_removed_nodes = args["--removed-nodes"]
fix_usage = args["--usage"]

if fix_ssh_keys.nil? and fix_district_uids.nil? and fix_consumed_gears.nil? and
   fix_removed_nodes.nil? and fix_usage.nil?
  puts "You must specify at least one item to fix."
  usage
end

require "#{ENV['OPENSHIFT_BROKER_DIR'] || '/var/www/openshift/broker'}/config/environment"
include AdminHelper

# AdminHelper sets $verbose to false by default, so setting the $verbose flag after loading it
$verbose = args["--verbose"]

if args["--confirm"]
  auto_confirm = args["--confirm"].to_b
elsif report_only
  auto_confirm = false
else
  auto_confirm = nil
end

# Disable analytics for admin scripts
Rails.configuration.analytics[:enabled] = false
Rails.configuration.msg_broker[:rpc_options][:disctimeout] = 20
Rails.configuration.msg_broker[:rpc_options][:timeout] = 600

$log_file = "/var/log/openshift/broker/usage-refund.log"
$billing_api = nil
if $billing_enabled
  $billing_api = OpenShift::BillingService.instance
  $billing_api.set_logger($log_file, false)
end

trap("INT") do
  print_message "#{$0} Interrupted", true
  exit 1
end

def reset_consumed_gears(user_id)
  success = false
  user = nil
  begin
    user = CloudUser.find_by(_id: user_id)
    Lock.run_in_user_lock(user) do
      user_consumed_gears, app_actual_gears = check_consumed_gears(user)
      if user_consumed_gears != app_actual_gears
        user.consumed_gears = app_actual_gears
        user.save!
      end
    end
    success = true
  rescue Mongoid::Errors::DocumentNotFound
    puts "User #{user_id} not found."
  rescue Exception => e
    puts "Unable to set consumed gears for user #{user_id}, error: #{e.message}." 
  end
  return success
end

def mark_gear_removed(unresponsive_servers)
  db = OpenShift::DataStore.db(:primary)
  gear_idx = 0
  gear_search_count = 1
  while gear_search_count > 0
    filter = {"gears.#{gear_idx}.server_identity" => {"$in" => unresponsive_servers}}
    update_query = {'$set' => {"gears.#{gear_idx}.removed" => true}}
    db["applications"].update(filter, update_query, { :multi => true })
    gear_idx += 1
    gear_search_count = db["applications"].find({"gears.#{gear_idx}" => {"$exists" => true}}).count
  end
end

def analyze_app(app, unresponsive_servers)
  framework = { :available => false, :scaled =>false, :unresponsive_gears => [],
                :responsive_gears => [], :backup_available => false }
  db = { :available => false, :unresponsive_gears => [], :responsive_gears => [],
         :backup_available => false, :remove_features => [] }
  app_recoverable = false

  ci = app.component_instances.detect{ |i| i.cartridge.is_web_proxy? }

  # Gather required info on this app
  app.gears.each do |gear|
    unless unresponsive_servers.include?(gear.server_identity)
      raise Exception.new "Server identity: #{gear.server_identity} not in unresponsive servers but 'removed' is set for Gear: #{gear._id.to_s}" if gear.removed
      if app.group_instances[0]._id == gear.group_instance_id
        if gear.app_dns
          framework[:available] = true
        elsif app.ha
          if gear.has_component?(ci)
            framework[:available] = true
          else
            framework[:scaled] = true
          end
        else
          framework[:scaled] = true
        end
        framework[:responsive_gears] << gear
      else #app.group_instances[0]._id != gear.group_instance_id
        db[:available] = true
        db[:responsive_gears] << gear
      end
    else
      raise Exception.new "Server identity: #{gear.server_identity} is in unresponsive servers but 'removed' is not set for Gear: #{gear._id.to_s}" if !gear.removed
      if app.group_instances[0]._id == gear.group_instance_id
        framework[:unresponsive_gears] << gear
      else
        db[:unresponsive_gears] << gear
      end
    end
  end if app and app.gears.present?

  if !framework[:available] and !db[:available]
    framework[:backup_available] = true if framework[:scaled]
  elsif !framework[:available] and db[:available]
    db[:backup_available] = true
    framework[:backup_available] = true if framework[:scaled]
  elsif framework[:available]
    app_recoverable = true
    carts = []
    skip_gears = []
    db[:unresponsive_gears].each do |gear|
      if (gear.group_instance.gears - db[:unresponsive_gears]).blank?
        carts += gear.group_instance.all_component_instances.map { |ci| ci.cartridge_name }
        skip_gears << gear
      end
    end
    db[:remove_features] = carts.uniq
    db[:unresponsive_gears] -= skip_gears
  end

  return  app_recoverable, framework, db
end

def delete_apps(app_reports, app_ids)
  app_ids.each do |app_id|
    begin
      app = app_reports[app_id][0]
      app.destroy_app
      puts "Application with id: #{app._id} deleted." if $verbose
    rescue Exception => e
      print_message "Failed to delete application with id: #{app_reports[app_id][0]._id}, error: #{e.message}"
    end
  end
end

def recover_app(app, framework, db)
  begin
    unless db[:remove_features].blank?
      app.remove_cartridges(db[:remove_features])
      puts "Removed cartridges: #{db[:remove_features].join(',')} from application with id: #{app._id}." if $verbose
    end
    (framework[:unresponsive_gears] + db[:unresponsive_gears]).each { |gear| app.remove_gear(gear._id.to_s) }

    # Fix overrides for the app
    component_instance = app.component_instances[0]
    override = app.group_instances_with_overrides.detect{|i| i.instance == component_instance.group_instance }
    result = app.update_component_limits(component_instance, override.min_gears, override.max_gears, override.additional_filesystem_gb)
    raise Exception.new(result.errorIO.string) unless result.errorIO.string.empty?

    puts "Application with id: #{app._id} fixed." if $verbose
  rescue Exception => e
    print_message "Failed to recover application with id: #{app._id}, error: #{e.message}"
  end
end

def create_usage_record(gear_id, usage_type, exists_in_app, current_time)
  user_id = nil
  app_name = nil
  event = nil
  gear_size = nil
  addtl_fs_gb = nil
  cart_name = nil
  time = nil
  if exists_in_app
    app_id = $app_gear_hash[gear_id]['app_id']
    begin
      app = Application.find_by(_id: app_id)
    rescue Mongoid::Errors::DocumentNotFound
      return
    end
    return unless app
    user_id = app.domain.owner._id
    app_name = app.name
    event = UsageRecord::EVENTS[:begin]
    usage = Usage.find_latest_by_user_gear(user_id, gear_id, usage_type)
    if usage and (usage.app_name == app_name) and usage.end_time.nil?
      time = usage.begin_time
    else
      time = Time.now.utc
    end
    if usage_type == UsageRecord::USAGE_TYPES[:gear_usage]
      gear = nil
      app.gears.each do |g|
        if g._id.to_s == gear_id.to_s
          gear = g
          break
        end
      end if app and app.gears.present?
      return unless gear
      gear_size = gear.group_instance.gear_size
    elsif usage_type == UsageRecord::USAGE_TYPES[:addtl_fs_gb]
      addtl_fs_gb = $app_gear_hash[gear_id]['addtl_fs_gb']
      return if addtl_fs_gb <= 0
    elsif usage_type == UsageRecord::USAGE_TYPES[:premium_cart]
      carts = $app_gear_hash[gear_id]['premium_carts']
      return if carts.size == 0
      cart_name = carts.first
    end
  else # !exists_in_app
    urec = UsageRecord.find_latest_by_gear(gear_id, usage_type)
    return if urec.event == UsageRecord::EVENTS[:end]
    user_id = urec.user_id
    app_name = urec.app_name
    event = UsageRecord::EVENTS[:end]
    gear_size = urec.gear_size
    addtl_fs_gb = urec.addtl_fs_gb
    cart_name = urec.cart_name
    time = urec.time
  end

  if UsageRecord.where(user_id: user_id, gear_id: gear_id, usage_type: usage_type,
                       app_name: app_name, time: time, event: event).count == 0
    urec = UsageRecord.new(user_id: user_id, gear_id: gear_id, usage_type: usage_type,
                           app_name: app_name, created_at: time,
                           time: time, event: event)
    urec.gear_size = gear_size if gear_size
    urec.addtl_fs_gb = addtl_fs_gb if addtl_fs_gb
    urec.cart_name = cart_name if cart_name
    urec.save!
    puts "Fixed usage for gear_id: #{gear_id}, usage_type: #{usage_type}, added " +
         (exists_in_app ? "begin rec in usage_record" : "end rec in usage_record") if $verbose
    if event == UsageRecord::EVENTS[:end]
      Usage.where(user_id: user_id, gear_id: gear_id, usage_type: usage_type,
                  app_name: app_name, end_time: nil).each do |usage|
        if usage
          usage.end_time = time
          usage.save!
          puts "Fixed usage for gear_id: #{gear_id}, usage_type: #{usage_type}, set end time in usage" if $verbose
        end
      end

      # Check whether we need to provide refunds to the user
      if $billing_enabled
        user = CloudUser.find_by(_id: user_id)
        if user.usage_account_id.present? and
           (user.plan_id.to_s != Rails.configuration.billing[:default_plan].to_s)
            msg = "User: #{user_id}, Plan: #{user.plan_id}, UsageAccount: #{user.usage_account_id}, "
          if gear_size
            msg += "Gear: #{gear_size},"
          elsif addtl_fs_gb
            msg += "Storage: #{addtl_fs_gb},"
          elsif cart_name
            msg += "Premium-Cart: #{cart_name},"
          end
          msg += " Start: #{time}, Fixed: #{current_time}"
          $billing_api.print_info msg
        end
      end
    end
  end
end

def populate_available_servers
  available_servers = {}
  # Find all available servers in the district
  $district_hash.each do |district_id, district_info|
    district_info["server_names"].each do |si|
      available_servers[si] = (district_info['name'] || 'NONE')
    end
  end 

  $datastore_hash.each do |gear_uuid, gear_info|
    si = gear_info['server_identity']
    available_servers[si] = 'NONE' if si.present? and !available_servers[si]
  end
  available_servers
end

def find_unresponsive_servers(available_servers, auto_confirm)
  options = OpenShift::MCollectiveApplicationContainerProxy.rpc_options
  unless auto_confirm.nil? # for test automation
    options[:disctimeout] = 5
  else
    options[:disctimeout] = 20
  end
  responsive_servers = OpenShift::MCollectiveApplicationContainerProxy.known_server_identities(true, options)
  unresponsive_servers = (available_servers.keys - responsive_servers)

  # Report unresponsive servers to the admin
  unless unresponsive_servers.blank?
    puts "Checking for unresponsive servers...FAIL" if $verbose
    puts "Servers that are unresponsive:"
    uservers = unresponsive_servers.dup
    uservers.each do |server| 
      print "\tServer: #{server} (district: #{available_servers[server]}), Confirm [yes/no]: "
      unresponsive_servers.delete(server) unless auto_confirm or (auto_confirm.nil? and gets.chomp.to_b)
      puts ""
    end
    print_message "Some servers are unresponsive: #{unresponsive_servers.join(', ')}"
  end
  unresponsive_servers
end

def repair_consumed_gears(user_ids)
  return if user_ids.blank?
  failed_count = 0
  puts "Fixing consumed gears count for all affected users" if $verbose
  user_ids.each do |user_id|
    begin
      unless reset_consumed_gears(user_id)
        print_message "Failed to fix consumed gears mismatches for user '#{user_id}'"
        failed_count += 1
      end
    rescue Mongoid::Errors::DocumentNotFound
    rescue Exception => ex
      print_message "Failed to fix consumed gears mismatches for user '#{user_id}': #{ex.message}"
      failed_count += 1
    end
  end if user_ids.present?
  puts "Failed to fix consumed gears mismatches for #{failed_count} applications." if failed_count > 0
end

def repair_ssh_keys(app_ids)
  return if app_ids.blank?
  failed_count = 0
  puts "Fixing ssh key inconsistencies for all affected applications" if $verbose
  app_ids.each do |app_id|
    begin
      app = Application.find(app_id)
      app.fix_gear_ssh_keys()
    rescue Mongoid::Errors::DocumentNotFound
    rescue Exception => ex
      print_message "Failed to fix ssh key mismatches for application '#{app_id}': #{ex.message}"
      failed_count += 1
    end
  end
  puts "Failed to fix ssh key mismatches for #{failed_count} applications." if failed_count > 0
end

def repair_stale_keys_vars(domain_ids)
  return if domain_ids.blank?
  stale_ssh_key_app_ids = []
  failed_count = 0
  puts "Fixing stale ssh keys and environment variables for all affected domains" if $verbose
  domain_ids.each do |domain_id|
    begin
      domain_ref_ids = []
      env_vars_to_rm = []
      system_ssh_keys_to_rm = []
      domain = Domain.find(domain_id)
      domain.applications.each do |app|
        app_ref_ids = []
        app_ref_ids |= app.component_instances.map {|ci| ci._id.to_s}
        app_ref_ids |= app.gears.map {|g| g._id.to_s}

        # fix stale application ssh keys
        app_ssh_keys_to_rm = []
        app.app_ssh_keys.each do |key|
          app_ssh_keys_to_rm << key.as_document unless app_ref_ids.include? key.component_id.to_s
        end

        if app_ssh_keys_to_rm.present?
          Application.where(_id: app.id).update_all({ "$pullAll" => { app_ssh_keys: app_ssh_keys_to_rm }})

          # if an ssh key was removed, then include the application in the cleanup to remove mongo/node inconsistencies
          stale_ssh_key_app_ids << app.id.to_s
        end

        domain_ref_ids |= app_ref_ids
      end

      # fix stale environment variables
      domain.env_vars.each do |ev|
        env_vars_to_rm << ev.dup unless domain_ref_ids.include? ev["component_id"].to_s
      end
      Domain.where(_id: domain.id).update_all({ "$pullAll" => { env_vars: env_vars_to_rm }}) if env_vars_to_rm.present?

      # fix stale domain ssh keys
      domain.system_ssh_keys.each do |key|
        system_ssh_keys_to_rm << key.as_document unless domain_ref_ids.include? key.component_id.to_s
      end

      if system_ssh_keys_to_rm.present?
        Domain.where(_id: domain.id).update_all({ "$pullAll" => { system_ssh_keys: system_ssh_keys_to_rm }})

        # if a domain ssh key was removed, then all applications within the domain should be included in the cleanup
        stale_ssh_key_app_ids = domain.applications.map(&:_id).map(&:to_s)
      end

      # Now fix ssh key mismatches that have been created as a result of cleaning out the stale ssh keys from mongo
      repair_ssh_keys(stale_ssh_key_app_ids)

    rescue Mongoid::Errors::DocumentNotFound
    rescue Exception => ex
      print_message "Failed to fix stale variables/sshkeys for domain '#{domain_id}': #{ex.message}"
      print_message ex.backtrace
      failed_count += 1
    end
  end
  puts "Failed to fix stale variables/sshkeys for #{failed_count} applications." if failed_count > 0
end

def repair_district_uids(unreserved_dist_map, unused_dist_map)
  return if unreserved_dist_map.blank? and unused_dist_map.blank?
  fixed_unreserved_count = 0
  failed_unreserved_count = 0
  puts "Fixing unreserved district UIDs for all affected districts" if $verbose
  unreserved_dist_map.each do |district_uuid, uids|
    uids.each do |gear_uid|
      district_info = $district_hash[district_uuid]
      si_list = district_info['server_names']
      query = {"gears" => {"$elemMatch" => { "uid" => gear_uid, "server_identity" => {"$in" => si_list}}}}
      begin
        app = Application.find_by(query)
        Lock.run_in_app_lock(app) do
          # check if this app still has the gear UID
          verify_query = {"_id" => app._id}
          verify_query.merge! query
          if Application.where(verify_query).count > 0
            reserved_uid = District::reserve_uid(district_uuid, gear_uid)
            if reserved_uid.nil?
              print_message "Failed to reserve UID #{gear_uid} within district #{district_info['name']}"
              failed_unreserved_count += 1
            else
              fixed_unreserved_count += 1
            end
          end
        end
      rescue Mongoid::Errors::DocumentNotFound
      end
    end
  end
  puts "Fixed #{fixed_unreserved_count} unreserved UIDs across all districts." if $verbose and (fixed_unreserved_count > 0)
  puts "Failed to fix #{failed_unreserved_count} unreserved UIDs across all districts." if failed_unreserved_count > 0

  puts "Fixing unused district UIDs for all affected districts" if $verbose
  unused_dist_map.each do |district_uuid, uids|
    uids.each do |gear_uid|
      district_info = $district_hash[district_uuid]
      unless datastore_has_gear_uid?(gear_uid, district_info['server_names'])
        District::unreserve_uid(district_uuid, gear_uid)
      end
    end
  end
  puts "Fixed #{unused_dist_map.length} unused UIDs across all districts." if $verbose and (unused_dist_map.length > 0)
end

def repair_usage(gear_app_gear_ids, gear_urec_gear_ids, storage_app_gear_ids, 
                 storage_urec_gear_ids, storage_mismatch_gear_ids, cart_app_gear_ids,
                 cart_urec_gear_ids, gear_ids, urec_gear_ids)
  return if (gear_app_gear_ids + gear_urec_gear_ids + storage_app_gear_ids + storage_urec_gear_ids +
             storage_mismatch_gear_ids + cart_app_gear_ids + cart_urec_gear_ids + gear_ids + urec_gear_ids).blank?
  puts "Fixing usage inconsistencies" if $verbose
  current_time = Time.now.utc
  puts "************"
  puts "Check file: #{$log_file} for info on refunds to the user because of incorrect usage billing." if $billing_enabled
  puts "************"
  gear_app_gear_ids.each do |gear_id|
    create_usage_record(gear_id, UsageRecord::USAGE_TYPES[:gear_usage], true, current_time)
  end if gear_app_gear_ids.present?

  gear_urec_gear_ids.each do |gear_id|
    create_usage_record(gear_id, UsageRecord::USAGE_TYPES[:gear_usage], false, current_time)
  end if gear_urec_gear_ids.present?

  storage_app_gear_ids.each do |gear_id|
    create_usage_record(gear_id, UsageRecord::USAGE_TYPES[:addtl_fs_gb], true, current_time)
  end if storage_app_gear_ids.present?

  storage_urec_gear_ids.each do |gear_id|
    create_usage_record(gear_id, UsageRecord::USAGE_TYPES[:addtl_fs_gb], false, current_time)
  end if storage_urec_gear_ids.present?

  cart_app_gear_ids.each do |gear_id|
    create_usage_record(gear_id, UsageRecord::USAGE_TYPES[:premium_cart], true, current_time)
  end if cart_app_gear_ids.present?

  cart_urec_gear_ids.each do |gear_id|
    create_usage_record(gear_id, UsageRecord::USAGE_TYPES[:premium_cart], false, current_time)
  end if cart_urec_gear_ids.present?

  storage_mismatch_gear_ids.each do |gear_id|
    UsageRecord.where({'gear_id' => Moped::BSON::ObjectId(gear_id.to_s), 'usage_type' => UsageRecord::USAGE_TYPES[:addtl_fs_gb]}).each do |urec|
      next if urec.event == UsageRecord::EVENTS[:end]
      addtl_fs_gb = $app_gear_hash[gear_id]['addtl_fs_gb']
      if (addtl_fs_gb > 0) and (urec.addtl_fs_gb != addtl_fs_gb)
        urec.addtl_fs_gb = addtl_fs_gb
        urec.save!
        puts "Fixed usage for gear_id: #{gear_id}, usage_type: #{urec.usage_type}, rectified storage mismatch in usage_record" if $verbose
      end
    end
    Usage.where({'gear_id' => Moped::BSON::ObjectId(gear_id.to_s), 'usage_type' => UsageRecord::USAGE_TYPES[:addtl_fs_gb]}).each do |usage|
      next if usage.end_time
      addtl_fs_gb = $app_gear_hash[gear_id]['addtl_fs_gb']
      if (addtl_fs_gb > 0) and (usage.addtl_fs_gb != addtl_fs_gb)
        usage.addtl_fs_gb = addtl_fs_gb
        usage.save!
        puts "Fixed usage for gear_id: #{gear_id}, usage_type: #{usage.usage_type}, rectified storage mismatch in usage" if $verbose
      end
    end 
  end if storage_mismatch_gear_ids.present?

  urec_gear_ids.each do |gear_id|
    UsageRecord.where({'gear_id' => Moped::BSON::ObjectId(gear_id.to_s)}).each do |urec|
      next if urec.event == UsageRecord::EVENTS[:end]
      if Usage.where(user_id: urec.user_id, gear_id: gear_id, usage_type: urec.usage_type,
                     app_name: urec.app_name, created_at: urec.created_at).count == 0
        usage = Usage.new(user_id: urec.user_id, gear_id: gear_id, usage_type: urec.usage_type,
                          app_name: urec.app_name, created_at: urec.created_at,
                          begin_time: urec.time)
        usage.gear_size = urec.gear_size if urec.gear_size
        usage.addtl_fs_gb = urec.addtl_fs_gb if urec.addtl_fs_gb
        usage.cart_name = urec.cart_name if urec.cart_name
        usage.save!
        puts "Fixed usage for gear_id: #{gear_id}, usage_type: #{urec.usage_type}, add begin record in usage" if $verbose
      end
    end
  end if urec_gear_ids.present?

  gear_ids.each do |gear_id|
    Usage.where({'gear_id' => Moped::BSON::ObjectId(gear_id.to_s)}).each do |usage|
      next if usage.end_time
      if UsageRecord.where(user_id: usage.user_id, gear_id: gear_id, usage_type: usage.usage_type,
                           app_name: usage.app_name).count == 0
        usage.end_time = usage.begin_time
        usage.save!
        puts "Fixed usage for gear_id: #{gear_id}, usage_type: #{usage.usage_type}, added end time in usage" if $verbose
      elsif Usage.where(user_id: usage.user_id, gear_id: gear_id, usage_type: usage.usage_type,
                        app_name: usage.app_name, end_time: nil).count > 1
        usage.delete
        puts "Fixed usage for gear_id: #{gear_id}, usage_type: #{usage.usage_type}, deleted duplicate record in usage" if $verbose
      end
    end  
  end if gear_ids.present?
end

def repair_removed_nodes(available_servers, unresponsive_servers, auto_confirm)
  return if unresponsive_servers.blank?

  servers_to_delete = []
  unless (available_servers.values.uniq - ['NONE']).blank?
    # Set unresponsive flag for unresponsive-servers in their respective districts
    unresponsive_servers.each do |server|
      if available_servers[server] != 'NONE'
        begin
          deactivated = false
          district = District.find_by_name(available_servers[server])
          district.servers.each do |si|
            next if si.name != server
            deactivated = true unless si.active
            res = District.where(:_id => district._id, "servers.name" => server).update({"$set" => {"servers.$.unresponsive" => true}})
            raise OpenShift::OOException.new("Could not set unresponsive to true for #{si.name}") if res.nil? or !res["updatedExisting"]
            break
          end
          # reload district object to reflect the above mongo updates
          district.reload
          servers_to_delete << [server, deactivated, district]
        rescue Exception => e
          puts "Unable to set unresponsive flag for server: #{server} from district: #{available_servers[server]}, error: #{e.message}"
        end
      end
    end
    puts ""
  end

  # Mark removed = true on all gears that are unresponsive
  begin
    mark_gear_removed(unresponsive_servers)
  rescue Exception => e
    print_message "Failed marking 'removed' field in gear, error: #{e.message}"
    puts e.backtrace.inspect
    exit 1
  end

  # Find all apps that are unresponsive
  query = {"gears.server_identity" => {"$in" => unresponsive_servers}}
  selection = {:fields => ["name", "scalable"], :timeout => false}
  unresponsive_scalable_apps = []
  unresponsive_unscalable_apps = []
  OpenShift::DataStore.find(:applications, query, selection) do |app|
    app_info = {'_id' => app['_id'], 'name' => app['name'] }
    if app['scalable']
      unresponsive_scalable_apps << app_info
    else
      unresponsive_unscalable_apps << app_info
    end
  end

  unless unresponsive_unscalable_apps.blank?
    # Report unresponsive unscalable apps
    puts "Found #{unresponsive_unscalable_apps.size} unresponsive unscalable apps:"
    unresponsive_unscalable_apps.each { |ai| puts "#{ai['name']} (id: #{ai['_id']})" }
    puts ""
    print "These apps can not be recovered. Do you want to delete all of them [yes/no]: "
    if auto_confirm or (auto_confirm.nil? and gets.chomp.to_b)
      unresponsive_unscalable_apps.each do |app_info|
        begin
          Application.find_by(_id: Moped::BSON::ObjectId(app_info['_id'].to_s)).destroy_app
          puts "Application with id: #{app_info['_id']} deleted." if $verbose
        rescue Exception => e
          print_message "Unable to delete application with id: #{app_info['_id']}, error: #{e.message}"
        end
      end
      puts "Finished deleting unresponsive unscalable apps." if $verbose
    else
      puts "Skipped deleting unresponsive unscalable apps." if $verbose
    end
    puts ""
  end 

  unless unresponsive_scalable_apps.blank?
    # Analyze scalable apps
    app_reports = {}
    unresponsive_scalable_apps.each do |app_info|
      begin
        app_id = app_info['_id'].to_s
        app = Application.find_by(_id: Moped::BSON::ObjectId(app_id))
        recoverable, framework, db = analyze_app(app, unresponsive_servers)
        app_reports[app_id] = [app, recoverable, framework, db]
      rescue Exception => e
        print_message "Unable to analyze application with id: #{app_info['_id']}, error: #{e.message}"
      end
    end

    apps_not_recoverable = []
    app_reports.each do |app_id, app_info|
      app, recoverable, framework, db = app_info[0], app_info[1], app_info[2], app_info[3]
      if !recoverable and !framework[:backup_available] and !db[:backup_available]
        apps_not_recoverable << app_id
      end 
    end
    unless apps_not_recoverable.blank?
      puts "Found #{apps_not_recoverable.size} unresponsive scalable apps that can not be recovered."
      apps_not_recoverable.each { |app_id| puts "#{app_reports[app_id][0].name} (id: #{app_id})" }
      puts ""
      print "Do you want to delete all of them [yes/no]: "
      if auto_confirm or (auto_confirm.nil? and gets.chomp.to_b)
        delete_apps(app_reports, apps_not_recoverable)
      end
      app_reports.delete_if { |k,v| apps_not_recoverable.include?(k) }
      puts ""
    end

    apps_to_backup = []
    app_reports.each do |app_id, app_info|
      app, recoverable, framework, db = app_info[0], app_info[1], app_info[2], app_info[3]
      if !recoverable and (framework[:backup_available] or db[:backup_available])
        apps_to_backup << app_id
      end
    end
    unless apps_to_backup.blank?
      puts "Found #{apps_to_backup.size} unresponsive scalable apps that can not be recovered but framework/db backup available."
      apps_to_backup.each do |app_id|
        app_info = app_reports[app_id]
        gear_ids = []
        (app_info[2][:responsive_gears] + app_info[3][:responsive_gears]).each { |gear| gear_ids << gear._id.to_s }
        puts "#{app_info[0].name} (id: #{app_id}, backup-gears: #{gear_ids.join(', ')})"
      end
      puts ""
      print "Do you want to skip all of them [yes/no]:(Warning: entering 'no' will delete the apps) "
      if auto_confirm or (auto_confirm.nil? and !gets.chomp.to_b)
        delete_apps(app_reports, apps_to_backup)
      end
      app_reports.delete_if { |k,v| apps_to_backup.include?(k) }
      puts ""
    end

    apps_to_remove_features = []
    app_reports.each do |app_id, app_info|
      app, recoverable, framework, db = app_info[0], app_info[1], app_info[2], app_info[3]
      if recoverable and !db[:remove_features].blank?
        apps_to_remove_features << app_id
      end 
    end
    unless apps_to_remove_features.blank?
      puts "Found #{apps_to_remove_features.size} unresponsive scalable apps that are recoverable but some features/carts need to be removed."
      apps_to_remove_features.each do |app_id|
        puts "#{app_reports[app_id][0].name} (id: #{app_id} features-to-remove: #{app_reports[app_id][3][:remove_features].join(', ')})"
      end
      puts ""
      print "Do you want to fix all of them [yes/no]:(Warning: entering 'yes' will remove features from apps) "
      if auto_confirm or (auto_confirm.nil? and gets.chomp.to_b)
        apps_to_remove_features.each do |app_id|
          app_info = app_reports[app_id]
          app, recoverable, framework, db = app_info[0], app_info[1], app_info[2], app_info[3]
          recover_app(app, framework, db)
        end
      end
      app_reports.delete_if { |k,v| apps_to_remove_features.include?(k) }
      puts ""
    end

    unless app_reports.blank?
      puts "Found #{app_reports.size} unresponsive scalable apps that are recoverable."
      app_reports.each { |app_id, app_info| puts "#{app_info[0].name} (id: #{app_id})" }
      puts ""
      app_reports.each do |app_id, app_info|
        app, recoverable, framework, db = app_info[0], app_info[1], app_info[2], app_info[3]
        if recoverable
          recover_app(app, framework, db)
        else
          print_message "Error: Found app that is not recoverable but not processed. Details framework: #{framework.inspect}, db: #{db.inspect}"
        end
      end
      puts ""
    end
    puts "Finished fixing/deleting unresponsive scalable apps." if $verbose
  end

  # Skip servers if any apps are still referencing it
  servers_to_delete.delete_if {|server, _, _| Application.where({"gears.server_identity" => server}).exists? }
  unless servers_to_delete.blank?
    puts ""
    print "Do you want to delete unresponsive servers from their respective districts [yes/no]: "
    if auto_confirm or (auto_confirm.nil? and gets.chomp.to_b)
      # Remove unresponsive servers from their respective districts
      servers_to_delete.each do |server, deactivated, district|
        begin
          district.deactivate_node(server) unless deactivated
          district.remove_node(server)
          puts "Server: #{server} removed from district: #{district.name}" if $verbose
        rescue Exception => e
          puts "Unable to delete server: #{server} from district: #{district.name}, error: #{e.message}"
        end
      end
      puts "Finished deleting unresponsive servers from their respective districts." if $verbose
    else
      puts "Skipped deleting unresponsive servers from their respective districts." if $verbose
    end
  end
end

current_time = Time.now.utc
puts "Started at: #{current_time}"
start_time = (current_time.to_f * 1000).to_i

$chk_gear_mongo_node = true if fix_ssh_keys or fix_district_uids or fix_removed_nodes
$chk_district = true if fix_district_uids or fix_removed_nodes
$chk_usage = true if fix_usage

populate_user_hash
populate_domain_hash
populate_district_hash
populate_app_hash

consumed_gears_user_ids = []
consumed_gears_user_ids = run :find_consumed_gears_inconsistencies if fix_consumed_gears

ssh_keys_app_ids = []
stale_keys_vars_domain_ids = []
ssh_keys_app_ids = run :find_ssh_key_inconsistencies if fix_ssh_keys
stale_keys_vars_domain_ids = run :find_stale_sshkeys_and_envvars if fix_ssh_keys

unreserved_district_uid_map = {}
unused_district_uid_map = {}
unreserved_district_uid_map, unused_district_uid_map = run :find_district_inconsistencies if fix_district_uids

usage_gear_app_gear_ids = []
usage_gear_urec_gear_ids = []
usage_storage_app_gear_ids = []
usage_storage_urec_gear_ids = []
usage_storage_mismatch_gear_ids = []
usage_cart_app_gear_ids = []
usage_cart_urec_gear_ids = []
usage_gear_ids = []
usage_urec_gear_ids = []
if fix_usage
  populate_usage_hash
  usage_gear_app_gear_ids, usage_gear_urec_gear_ids = run :find_app_gear_usage_record_inconsistencies
  usage_storage_app_gear_ids, usage_storage_urec_gear_ids, usage_storage_mismatch_gear_ids = run :find_app_storage_usage_record_inconsistencies
  usage_cart_app_gear_ids, usage_cart_urec_gear_ids = run :find_app_premium_cart_usage_record_inconsistencies
  usage_gear_ids, usage_urec_gear_ids = run :find_usage_record_usage_inconsistencies
end

unresponsive_servers = []
available_servers = {}
if fix_removed_nodes
  available_servers = populate_available_servers
  unresponsive_servers = run :find_unresponsive_servers, available_servers, auto_confirm
end

print_message "", true
$total_errors = 0

unless report_only
  run :repair_consumed_gears, consumed_gears_user_ids
  run :repair_ssh_keys, ssh_keys_app_ids
  run :repair_stale_keys_vars, stale_keys_vars_domain_ids
  run :repair_district_uids, unreserved_district_uid_map, unused_district_uid_map
  run :repair_usage, usage_gear_app_gear_ids, usage_gear_urec_gear_ids,
                    usage_storage_app_gear_ids, usage_storage_urec_gear_ids,
                    usage_storage_mismatch_gear_ids, usage_cart_app_gear_ids,
                    usage_cart_urec_gear_ids, usage_gear_ids, usage_urec_gear_ids
  run :repair_removed_nodes, available_servers, unresponsive_servers, auto_confirm
end

end_time = Time.now.utc
puts "\nFinished at: #{end_time}"
total_time = (end_time.to_f * 1000).to_i - start_time
puts "Total time: #{total_time.to_f/1000}s"
if $total_errors == 0
  print_message "SUCCESS", true
  errcode = 0
else
  print_message "FAILED", true
  errcode = 1
end
exit errcode
