#!/usr/bin/env oo-ruby

#--
# Copyright 2012 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

require 'rubygems'
require 'getoptlong'
require 'time'

def usage
  puts <<USAGE
== Synopsis

#{$0}:  Utility to check and fix various inconsistencies in mongo data

The following issues can be fixed with this script:
  - mismatch between user's consumed gears and actual gears across all domains/applications for the user
  - mismatch between the ssh keys in mongo and the ssh keys on the gear on the node
  - mismatch in available UIDs for a district and actual UIDs used by the gears on the nodes within the district

== Usage

#{$0} OPTIONS

Options:
-v|--verbose
    Print information about each check being performed
-r|--report-only
    Only report the mismatches, don't fix them 
--consumed-gears
    Fix  mismatch in user's consumed gears vs actual gears in mongo
--ssh-keys
    Fix  mismatch in SSH keys between mongo and on the node for a gear
--district-uids
    Fix mismatch in available UIDs for a district in mongo
-h|--help
    Show Usage info
USAGE
  exit 255
end

args = {}
begin
  opts = GetoptLong.new(
    ["--verbose",          "-v", GetoptLong::NO_ARGUMENT],
    ["--report-only",      "-r", GetoptLong::NO_ARGUMENT],
    ["--ssh-keys",               GetoptLong::NO_ARGUMENT],
    ["--district-uids",          GetoptLong::NO_ARGUMENT],
    ["--consumed-gears",         GetoptLong::NO_ARGUMENT],
    ["--help",             "-h", GetoptLong::NO_ARGUMENT]
  )
  opts.each{ |k, v| args[k] = v }
rescue GetoptLong::Error => e
  usage
end

report_only = args["--report-only"]
fix_ssh_keys = args["--ssh-keys"]
fix_district_uids = args["--district-uids"]
fix_consumed_gears = args["--consumed-gears"]
verbose = args["--verbose"]
usage if args["--help"] 

if fix_ssh_keys.nil? and fix_district_uids.nil? and fix_consumed_gears.nil?
  puts "You must specify at least one item to fix."
  usage
end

require "/var/www/openshift/broker/config/environment"
# Disable analytics for admin scripts
Rails.configuration.analytics[:enabled] = false
Rails.configuration.msg_broker[:rpc_options][:disctimeout] = 20
Rails.configuration.msg_broker[:rpc_options][:timeout] = 600

def lock_user_without_app(user_id, timeout=30)
  begin
    now = Time.now.to_i
    lock = Lock.with(consistency: :strong).find_or_create_by( :user_id => user_id )
    query = {:user_id => user_id, "$or" => [{:locked => false}, {:timeout.lt => now}], "app_ids" => {}}
    updates = {"$set" => { locked: true, timeout: (now + timeout) }}
    lock = Lock.with(consistency: :strong).where(query).find_and_modify(updates, new: true)
    return (not lock.nil?)
  rescue Moped::Errors::OperationFailure
    return false
  end
end
  
def unlock_user_without_app(user_id)
  begin
    query = {:user_id => user_id, :locked => true}
    updates = {"$set" => { "locked" => false }}
    lock = Lock.with(consistency: :strong).where(query).find_and_modify(updates, new: true)
    return (not lock.nil?)
  rescue Moped::Errors::OperationFailure
    return false
  end
end

def check_consumed_gears(user_id)
  begin
    actual_gears = 0
    user = CloudUser.with(consistency: :strong).find_by(:_id => user_id)
    user.domains.each do |d|
      d.applications.each do |a|
        a.group_instances.each do |gi|
          actual_gears += gi.gears.length
        end
      end
    end
    return user.consumed_gears, actual_gears
  rescue Mongoid::Errors::DocumentNotFound
    puts "Error: User with ID #{user_id} not found in mongo"
    return 0, 0
  end
end

def reset_consumed_gears(user_id)
  got_lock = false
  success = false
  got_lock = lock_user_without_app(user_id)
  
  if got_lock
    begin
      user_consumed_gears, app_actual_gears = check_consumed_gears(user_id)
      if user_consumed_gears != app_actual_gears
        CloudUser.where(:_id => user_id).set(:consumed_gears, app_actual_gears)
      end
      success = true
    ensure
      unlock_user_without_app(user_id)
    end
  end
  return success
end

def datastore_has_gear_uid?(gear_uid, si_list)
  query = {"group_instances.gears" => {"$elemMatch" => { "uid" => gear_uid, "server_identity" => {"$in" => si_list}}}}
  return Application.where(query).exists?
end

def district_has_available_uid?(district_uuid, gear_uid)
  query = { "uuid" => district_uuid, "available_uids" => gear_uid }
  return District.where(query).exists?
end

def get_user_info(user)
  user_ssh_keys = {}
  user["ssh_keys"].each { |k| user_ssh_keys[Digest::MD5.hexdigest(k["content"])] = k["name"] } if user["ssh_keys"]
  return {"login" => user["login"], 
          "consumed_gears" => user["consumed_gears"], 
          "domains" => {}, 
          "ssh_keys" => user_ssh_keys}
end


no_error = true
summary = []

datastore_hash = {}
user_hash = {}
domain_hash = {}
gear_uid_hash = {}
district_hash = {}
gear_sshkey_hash = {}
error_ssh_keys_app_uuids = []
error_consumed_gears_user_ids = []
error_unreserved_district_uid_map = {}
error_unused_district_uid_map = {}

puts "Started at: #{Time.now}"
start_time = (Time.now.to_f * 1000).to_i
app_query = {"group_instances.gears.0" => {"$exists" => true}}
app_selection = {:fields => ["name",
                             "uuid",
                             "created_at",
                             "domain_id",
                             "group_instances.gears.uuid",
                             "group_instances.gears.uid",
                             "group_instances.gears.server_identity",
                             "group_instances._id",
                             "app_ssh_keys.name", 
                             "app_ssh_keys.content"],
                 :timeout => false}
ret = []

user_selection = {:fields => ["login", "ssh_keys.name", "ssh_keys.content", "consumed_gears"], 
                  :timeout => false}
user_selection_primary = user_selection.dup
user_selection_primary[:fields] = user_selection[:fields].dup
user_selection_primary[:read] = :primary

OpenShift::DataStore.find(:cloud_users, {}, user_selection) do |user|
  user_hash[user["_id"].to_s] = get_user_info(user) 
end

domain_selection = {:fields => ["owner_id", "system_ssh_keys.name", "system_ssh_keys.content"], 
                    :timeout => false}
OpenShift::DataStore.find(:domains, {}, domain_selection) do |domain|
  owner_id = domain["owner_id"].to_s
  domain_hash[domain["_id"].to_s] = owner_id
  system_ssh_keys = {}
  domain["system_ssh_keys"].each { |k| system_ssh_keys[Digest::MD5.hexdigest(k["content"])] = k["name"] } if domain["system_ssh_keys"]
  
  if !user_hash[owner_id]
    OpenShift::DataStore.find(:cloud_users, {"_id" => BSON::ObjectId(owner_id)}, user_selection_primary) do |user|
      user_hash[user["_id"].to_s] = get_user_info(user) 
    end
  end

  if user_hash[owner_id]
    user_hash[owner_id]["ssh_keys"].merge! system_ssh_keys
    user_hash[owner_id]["domains"][domain["_id"].to_s] = 0
  end
end

if fix_district_uids and Rails.configuration.msg_broker[:districts][:enabled]
  OpenShift::DataStore.find(:districts, {}, {:timeout => false}) do |district|
    si_list =  district["server_identities"].map {|si| si["name"]}
    si_list.delete_if {|si| si.nil?}
    district_hash[district["uuid"]] = [ district["name"], district["max_capacity"], si_list, district["available_uids"] ] 
  end
end

OpenShift::DataStore.find(:applications, app_query, app_selection) do |app|
  gear_count = 0
  owner_id = nil
  login = nil
  creation_time = app['created_at']
  domain_id = app['domain_id'].to_s
  app_ssh_keys = {}
  app['app_ssh_keys'].each { |k| app_ssh_keys[Digest::MD5.hexdigest(k["content"])] = k["name"] } if app['app_ssh_keys']

  owner_id = domain_hash[domain_id]

  unless owner_id.nil? or user_hash[owner_id].nil?
    login = user_hash[owner_id]["login"]
    app_ssh_keys.merge! user_hash[owner_id]["ssh_keys"]

    app['group_instances'].each do |gi|
      gi['gears'].each do |gear|
        gear_count += 1
        datastore_hash[gear['uuid'].to_s] = [login, creation_time, gear['server_identity'], app["uuid"], app_ssh_keys ]

        if fix_district_uids and Rails.configuration.msg_broker[:districts][:enabled]
          # record all used uid values for each node to match later with the district
          gear_uid_hash[gear['server_identity']] = [] unless gear_uid_hash.has_key?(gear['server_identity'])
          gear_uid_hash[gear['server_identity']] << gear['uid'].to_i
        end
      end
    end
    user_hash[owner_id]["domains"][domain_id] += gear_count
  end
end

total_time = (Time.now.to_f * 1000).to_i - start_time
puts "Time to fetch mongo data: #{total_time.to_f/1000}s"
puts "Total gears found in mongo: #{datastore_hash.length}"


if fix_consumed_gears
  # Check consumed gears vs actual gears
  user_hash.each do |owner_id, owner_hash|
    total_gears = 0
    owner_hash["domains"].each { |dom_id, domain_gear_count| total_gears += domain_gear_count }

    print "Checking consumed gear count for user #{owner_hash['login']}...\t" if verbose
    if owner_hash['consumed_gears'] != total_gears
      user_consumed_gears, app_actual_gears = check_consumed_gears(owner_id)
      if user_consumed_gears != app_actual_gears
        puts "FAIL" if verbose
        msg = "User #{owner_hash['login']} has a mismatch in consumed gears (#{user_consumed_gears}) and actual gears (#{app_actual_gears})"
        summary << msg
        no_error = false
      
        # record the user id for fixing later
        error_consumed_gears_user_ids << owner_id
      elsif verbose
        puts "OK"
      end
    elsif verbose
      puts "OK"
    end
  end
end

if fix_ssh_keys
  get_all_sshkeys_start_time = (Time.now.to_f * 1000).to_i
  gear_sshkey_hash, sshkeys_sender_list = OpenShift::ApplicationContainerProxy.get_all_gears_sshkeys
  total_time = (Time.now.to_f * 1000).to_i - get_all_sshkeys_start_time
  puts "Time to get all sshkeys for all gears from nodes: #{total_time.to_f/1000}s"
  puts "Total gears found on the nodes: #{gear_sshkey_hash.length}"
  puts "Total nodes that responded : #{sshkeys_sender_list.length}"

  # store the current time for comparisons
  current_time = Time.now

  # now check
  puts "Checking application gears and ssh keys on corresponding nodes:" if verbose
  datastore_hash.each do |gear_uuid, gear_info|
    login = gear_info[0]
    creation_time = gear_info[1]
    server_identity = gear_info[2]
    app_uuid = gear_info[3]
    db_sshkeys = gear_info[4]

    print "Checking ssh keys for gear: #{gear_uuid}...\t" if verbose
    if (current_time - creation_time) > 600
      if gear_sshkey_hash.has_key? gear_uuid
        gear_sshkeys_list = gear_sshkey_hash[gear_uuid].keys.uniq.sort
        db_sshkeys_list = db_sshkeys.keys.uniq.sort
        if db_sshkeys_list == gear_sshkeys_list
          puts "OK" if verbose
        else
          no_error = false
          puts "FAIL" if verbose
          
          # calculate the common ssh keys in mongo and on the node
          common_sshkeys = gear_sshkeys_list & db_sshkeys_list

          # get the unmatched ssh keys for the gear from the node
          extra_gear_sshkeys = gear_sshkeys_list - common_sshkeys
          extra_gear_sshkeys.each do |key|
            summary << "Gear '#{gear_uuid}' has  key with comment '#{gear_sshkey_hash[gear_uuid][key]}' on the node but not in mongo."
          end
          
          # get the unmatched ssh keys for the gear in mongo
          extra_db_sshkeys = db_sshkeys_list - common_sshkeys
          extra_db_sshkeys.each do |key|
            summary << "Gear '#{gear_uuid}' has key with name '#{db_sshkeys[key]}' in mongo but not on the node."
          end
        
          # record the app uuid for fixing later
          error_ssh_keys_app_uuids << app_uuid
        end
      elsif verbose
        puts "OK"
      end
    elsif verbose
      puts "OK"
    end
  end
end

if fix_district_uids and Rails.configuration.msg_broker[:districts][:enabled]
  # check for any unreserved uid in the district
  # these are uids that gears are using but are still present in the district's available_uids
  puts "Checking for unreserved UIDs in the district:" if verbose
  gear_uid_hash.each do |server_identity, uid_list|
    district_hash.each do |district_uuid, district_info|
      if district_info[2].include?(server_identity)
        unreserved_uids = uid_list & district_info[3]
        unreserved_uids.each do |unreserved_uid|
          # re-checking unreserved UID in the database
          print "Re-checking UID #{unreserved_uid} in district #{district_info[0]} in the database...\t" if verbose
          if not datastore_has_gear_uid?(unreserved_uid, [server_identity])
            # the UID is no longer being used by any gear
            puts "OK" if verbose
          elsif not district_has_available_uid?(district_uuid, unreserved_uid)
            # the UID is available in the district
            puts "OK" if verbose
          else
            no_error = false
            puts "FAIL" if verbose
            summary << "UID '#{unreserved_uid}' is available in district '#{district_info[0]}' but used by a gear on node '#{server_identity}'"
            
            # record the UID for fixing later
            error_unreserved_district_uid_map[district_uuid] = [] unless error_unreserved_district_uid_map.has_key? district_uuid
            error_unreserved_district_uid_map[district_uuid] << unreserved_uid
          end
        end
        break
      end
    end
  end
  
  # check for any unused uid in the district
  # these are uids that are reserved in the district, but no gear is using
  puts "Checking for unused UIDs in the district:" if verbose 
  district_used_uids = []
  district_hash.each do |district_uuid, district_info|
    # collect gear uids from all nodes with server identities within this district
    district_info[2].each do |server_identity|
      district_used_uids |= (gear_uid_hash[server_identity] || [])
    end
   
    first_uuid = Rails.configuration.msg_broker[:districts][:first_uid]
    district_all_uids = []
    district_all_uids.fill(0, district_info[1]) {|i| first_uuid + i}
    district_unused_uids = district_all_uids - district_info[3] - district_used_uids 
    
    district_unused_uids.each do |unused_uid|
      # re-checking unused UID in the database
      print "Re-checking UID #{unused_uid} in district #{district_info[0]} in the database...\t" if verbose
      if datastore_has_gear_uid?(unused_uid, district_info[2])
        # found a gear that uses this UID  
        puts "OK" if verbose
      elsif district_has_available_uid?(district_uuid, unused_uid)
        # the UID is no longer reserved in the district
        puts "OK" if verbose
      else
        no_error = false
        puts "FAIL" if verbose
        summary << "UID '#{unused_uid}' is reserved in district '#{district_info[0]}' but not used by any gear"
              
        # record the UID for fixing later
        error_unused_district_uid_map[district_uuid] = [] unless error_unused_district_uid_map.has_key? district_uuid
        error_unused_district_uid_map[district_uuid] << unused_uid
      end
    end
  end
end


error_ssh_keys_app_uuids.uniq!
error_consumed_gears_user_ids.uniq!

puts no_error ? "Success" : "Check failed.\n#{summary.join("\n")}"

puts "" unless no_error
puts "Total #{error_ssh_keys_app_uuids.length} applications have ssh key mismatches." if error_ssh_keys_app_uuids.length > 0
puts "Total #{error_consumed_gears_user_ids.length} users have consumed gear mismatches." if error_consumed_gears_user_ids.length > 0

unless error_unreserved_district_uid_map.empty? 
  total_unreserved_uids = 0
  total_affected_districts = error_unreserved_district_uid_map.keys.length
  error_unreserved_district_uid_map.each { |district_uuid, uids| total_unreserved_uids += uids.length }
  puts "Total #{total_unreserved_uids} unreserved UIDs across #{total_affected_districts} districts."
end

unless error_unused_district_uid_map.empty?
  total_unused_uids = 0
  total_affected_districts = error_unused_district_uid_map.keys.length
  error_unused_district_uid_map.each { |district_uuid, uids| total_unused_uids += uids.length }
  puts "Total #{total_unused_uids} unused UIDs across #{total_affected_districts} districts."
end
puts "" unless no_error

# check if we need to fix the ssh key mismatches
fixed_app_count = 0
failed_app_count = 0
unless fix_ssh_keys.nil? or report_only or error_ssh_keys_app_uuids.length == 0
  puts "Fixing ssh key inconsistencies for all affected applications:"
  error_ssh_keys_app_uuids.each do |uuid|
    begin
      print "Fixing ssh keys for application: #{uuid}...\t" if verbose
      app = Application.find_by(:uuid => uuid)
      app.fix_gear_ssh_keys()
      puts "OK" if verbose
      fixed_app_count += 1
    rescue Mongoid::Errors::DocumentNotFound
      puts "OK" if verbose
      puts "Application '#{uuid}' not found in the database"
      fixed_app_count += 1
    rescue Exception => ex
      puts "FAIL" if verbose
      puts "Failed to fix ssh key mismatches for application '#{uuid}': #{ex.message}"
      failed_app_count += 1
    end
  end
end


# check if we need to fix the user's consumed gears mismatch
fixed_user_count = 0
failed_user_count = 0
unless fix_consumed_gears.nil? or report_only or error_consumed_gears_user_ids.length == 0
  puts "Fixing consumed gears count for all affected users:"
  error_consumed_gears_user_ids.each do |user_id|
    begin
      print "Fixing consumed gears for user: #{user_id}...\t" if verbose
      success = reset_consumed_gears(user_id)
      if success
        puts "OK" if verbose
        fixed_user_count += 1
      else
        puts "FAIL" if verbose
        puts "Skipped fixing consumed gears mismatches for user '#{user_id}'"
        failed_user_count += 1
      end
    rescue Mongoid::Errors::DocumentNotFound
      puts "OK" if verbose
      puts "User '#{user_id}' not found in the database"
      fixed_user_count += 1
    rescue Exception => ex
      puts "FAIL" if verbose
      puts "Failed to fix consumed gears mismatches for user '#{user_id}': #{ex.message}"
      failed_user_count += 1
    end
  end
end


# check if we need to fix unreserved district UIDs
fixed_unreserved_uid_count = 0
failed_unreserved_uid_count = 0
unless fix_district_uids.nil? or report_only or error_unreserved_district_uid_map.empty?
  puts "Fixing unreserved district UIDs for all affected districts:"
  error_unreserved_district_uid_map.each do |district_uuid, uids|
    uids.each do |gear_uid|
      district_info = district_hash[district_uuid]
      print "Fixing unreserved UID: #{gear_uid} in district: #{district_info[0]}...\t" if verbose
      si_list = district_info[2]
      query = {"group_instances.gears" => {"$elemMatch" => { "uid" => gear_uid, "server_identity" => {"$in" => si_list}}}}
      begin
        app = Application.find_by(query)
        Application.run_in_application_lock(app) do
          # check if this app still has the gear UID
          verify_query = {"_id" => app._id}
          verify_query.merge! query
          if Application.where(verify_query).count > 0
            reserved_uid = District::reserve_uid(district_uuid, gear_uid)
            if reserved_uid.nil?
              puts "FAIL" if verbose
              puts "Failed to reserve UID #{gear_uid} within district #{district_info[0]}"
              failed_unreserved_uid_count += 1
            else
              puts "OK" if verbose
              fixed_unreserved_uid_count += 1
            end
          else
            puts "OK" if verbose
            puts "Gear with UID #{gear_uid} not found within application with Id #{app._id} in mongo"
            fixed_unreserved_uid_count += 1
          end
        end
      rescue Mongoid::Errors::DocumentNotFound
        puts "OK" if verbose
        puts "Gear with UID #{gear_uid} not found within district #{district_info[0]} in mongo"
        fixed_unreserved_uid_count += 1
      end
    end
  end
end


# check if we need to fix unused district UIDs
fixed_unused_uid_count = 0
unless fix_district_uids.nil? or report_only or error_unused_district_uid_map.empty?
  puts "Fixing unused district UIDs for all affected districts:"
  error_unused_district_uid_map.each do |district_uuid, uids|
    uids.each do |gear_uid|
      district_info = district_hash[district_uuid]
      print "Fixing unused UID: #{gear_uid} in district: #{district_info[0]}...\t" if verbose
      if datastore_has_gear_uid?(gear_uid, district_info[2])
        puts "OK" if verbose
        puts "Gear with UID #{gear_uid} was found within district #{district_info[0]} in mongo"
        fixed_unused_uid_count += 1
      else
        District::unreserve_uid(district_uuid, gear_uid)
        puts "OK" if verbose
        fixed_unused_uid_count += 1
      end
    end
  end
end

puts "" unless no_error or report_only
puts "Fixed ssh key mismatches for #{fixed_app_count} applications." if fixed_app_count > 0
puts "Failed to fix ssh key mismatches for #{failed_app_count} applications." if failed_app_count > 0

puts "Fixed consumed gears mismatches for #{fixed_user_count} user." if fixed_user_count > 0
puts "Failed to fix consumed gears mismatches for #{failed_user_count} applications." if failed_user_count > 0

puts "Fixed #{fixed_unreserved_uid_count} unreserved UIDs across all districts." if fixed_unreserved_uid_count > 0
puts "Failed to fix #{failed_unreserved_uid_count} unreserved UIDs across all districts." if failed_unreserved_uid_count > 0

puts "Fixed #{fixed_unused_uid_count} unused UIDs across all districts." if fixed_unused_uid_count > 0
puts "" unless no_error or report_only

total_time = (Time.now.to_f * 1000).to_i - start_time
puts "Total time: #{total_time.to_f/1000}s"
puts "Finished at: #{Time.now}"
exit (no_error ? 0 : 1)
