#!/usr/bin/python
__requires__ = 'TurboGears[future]'
import pkg_resources
pkg_resources.require("TurboGears")

from sqlobject import *
import sys, os, string, stat
import turbogears
from mirrormanager.model import *
from mirrormanager.repomap import *
from mirrormanager.lib import manage_pidfile, remove_pidfile
import re
import glob
import yum.repoMDObject
import datetime, time
import hashlib
from turbogears import config
from subprocess import Popen, PIPE

rootdir='/'
pidfile='/var/run/mirrormanager/umdl.pid'

from turbogears.database import PackageHub
hub = PackageHub("mirrormanager")
__connection__ = hub

def trim_os_from_dirname(dirname):
    # trim the /os off the name
    index = dirname.rfind('/os')
    if index > 0:
        dirname = dirname[:index]
    return dirname

def rename_SRPMS_source(l):
    rc = []
    for i in l:
        if i == 'source':
            pass
        elif i == 'SRPMS':
            rc.append('source')
        else:
            rc.append(i)
    return rc


def _get_version_from_path(path):
    s = r'/(([\.\d]+)(\-\w+)?)/'
    m = re.search(re.compile(s), path)
    if m is not None:
        return m.group(1)
    return None

def create_version_from_path(category, path):
    ver = None
    vname = _get_version_from_path(path)
    if vname is not None and vname != '':
        if '/test/' in path:
            isTest = True
        else:
            isTest = False
        ver = Version(product=category.product, name=vname, isTest=isTest)

    return ver

def guess_ver_arch_from_path(category, path):
    arch = None
    if 'SRPMS' in path:
        arch = Arch.byName('source')
    else:
        for a in Arch.select():
            s = '.*(^|/)%s(/|$).*' % (a.name)
            if re.compile(s).match(path):
                arch = a
                break

    ver = None
    # newest versions/IDs first, also handles stupid Fedora 9.newkey hack.
    for v in Version.select(orderBy='-id'):
        if v.product != category.product: continue
        s = '.*(^|/)%s(/|$).*' % (v.name)
        if re.compile(s).match(path):
            ver = v
            break

    # create Versions if we can figure it out...
    if ver is None:
        ver = create_version_from_path(category, path)
        
    return (ver, arch)


# Something like this is committed to yum upstream, but may not be in the copy we are using.
def set_repomd_timestamp(yumrepo):
    timestamp = 0
    for ft in yumrepo.fileTypes():
        thisdata = yumrepo.repoData[ft]
        timestamp = max(int(thisdata.timestamp), timestamp)
    yumrepo.timestamp = timestamp
    return timestamp

def make_file_details_from_checksums(dir):
    def _parse_checksum_file(path, checksumlen):
        r = {}
        try:
            f = open(path, 'r')
            for line in f:
                line = line.strip()
                s = line.split()
                if len(s) < 2:
                    continue
                if len(s[0]) != checksumlen:
                    continue
                # strip off extraneous starting '*' char from name
                s[1] = s[1].strip('*')
                r[s[1]] = s[0]
            f.close()
        except:
            pass
        return r

    def _checksums_from_globs(dirname, globs, checksumlen):
        d = {}
        checksum_files = []
        for g in globs:
            checksum_files.extend(glob.glob(os.path.join(rootdir, dirname, g)))
        for f in checksum_files:
            d.update(_parse_checksum_file(f, checksumlen))
        return d

    sha1_globs = ['*.sha1sum', 'SHA1SUM']
    md5_globs = ['*.md5sum', 'MD5SUM']
    sha256_globs = ['*-CHECKSUM']
    sha512_globs = ['*.sha512sum', 'SHA512SUM']
    md5dict = _checksums_from_globs(dir.name, md5_globs, 32)
    sha1dict = _checksums_from_globs(dir.name, sha1_globs, 40)
    sha256dict = _checksums_from_globs(dir.name, sha256_globs, 64)
    sha512dict = _checksums_from_globs(dir.name, sha512_globs, 128)

    files = set()
    for k in md5dict.keys():
        files.add(k)
    for k in sha1dict.keys():
        files.add(k)
    for k in sha256dict.keys():
        files.add(k)
    for k in sha512dict.keys():
        files.add(k)

    for f in files:
        try:
            s = os.stat(os.path.join(rootdir, dir.name, f))
        except OSError:
            # bail if the file doesn't actually exist
            continue
        sha1 = sha1dict.get(f)
        md5  = md5dict.get(f)
        sha256  = sha256dict.get(f)
        sha512  = sha512dict.get(f)
        size = s.st_size
        ctime = s[stat.ST_CTIME]
        try:
            fd = FileDetail.selectBy(directory=dir, filename=f, sha1=sha1, md5=md5, sha256=sha256, sha512=sha512,
                                     size=size, timestamp=ctime)[0]
        except IndexError:
            fd = FileDetail(directory=dir, filename=f, sha1=sha1, md5=md5, sha256=sha256, sha512=sha512,
                            timestamp=ctime, size=size)
    

def make_repomd_file_details(dir):
    if not dir.name.endswith('/repodata'):
        return
    repomd_fname = os.path.join(rootdir, dir.name, 'repomd.xml')
    if not os.path.exists(repomd_fname):
        return
    try:
        f = open(repomd_fname, 'r')
        repomd = f.read()
        f.close()
    except:
        return
    size = len(repomd)
    md5 = hashlib.md5(repomd).hexdigest()
    sha1 = hashlib.sha1(repomd).hexdigest()
    sha256 = hashlib.sha256(repomd).hexdigest()
    sha512 = hashlib.sha512(repomd).hexdigest()

    yumrepo = yum.repoMDObject.RepoMD('repoid', repomd_fname)
    if 'timestamp' not in yumrepo.__dict__:
        set_repomd_timestamp(yumrepo)
    timestamp = yumrepo.timestamp
    try:
        fd = FileDetail.selectBy(directory=dir, filename='repomd.xml', sha1=sha1, md5=md5, sha256=sha256, sha512=sha512,
                                 timestamp=timestamp, size=size)[0]
    except IndexError:
        fd = FileDetail(directory=dir, filename='repomd.xml', sha1=sha1, md5=md5, sha256=sha256, sha512=sha512,
                        timestamp=timestamp, size=size)


def move_repository_from_development(prefix, arch, newdir):
    try:
        repo = Repository.selectBy(prefix=prefix, arch=arch)[0]
    except:
        return None

    if repo.directory is not None and newdir.name is not None:
        if '/development' in repo.directory.name and '/releases' in newdir.name:
            repo.directory = newdir
    return repo

def make_repository(dir, category):
    repo = None
    path = dir.name[len(category.topdir.name)+1:]
    (ver, arch) = guess_ver_arch_from_path(category, path)
    if ver is None or arch is None:
        return None

    prefix = repo_prefix(path, category, ver)
    try:
	# historically, Repository.name was a longer string with
	# product and category deliniations.  But we were getting
	# unique constraint conflicts once we started introducing
	# repositories under repositories.  And .name isn't used for
	# anything meaningful.  So simply have it match dir.name,
	# which can't conflict.
        repo = Repository(name=dir.name, category=category, version=ver, arch=arch, directory=dir, prefix=prefix)
    except:
        # repo exists, but it may be one we want to move
        #try:
        #repo = move_repository_from_development(prefix, arch, dir)
        #except:
        #pass
        pass

    return repo

    


def nuke_gone_directories(category, category_directories):
    """ deleting a Directory has a ripple effect through the whole
        database.  Be really sure you're ready do to this.  It comes
        in handy when say a Test release is dropped."""
        
    for d in Directory.select():
        if len(d.categories == 1) and category in d.categories:
            if not category_directories.has_key(d.name):
                d.destroySelf()

unreadable_dirs = {}

def parent_dir(path):
    sdir = path.split('/')[:-1]
    return '/'.join(sdir)


def ctime_from_rsync(date, hms):
    year, month, day = date.split('/')
    hour, minute, second = hms.split(':')
    t = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second), 0, None)
    return int(time.mktime(t.timetuple()))


def make_one_directory(line, category, path, category_directories):
    global unreadble_dirs
    readable=True
    d = line.split()[4]
    if re.compile('^\.$').match(d):
        dname = path
    else:
        dname = "%s/%s" % (path, d)
    perms = line.split()[0]
    if not re.compile('^d......r.x').match(perms) or parent_dir(dname) in unreadable_dirs:
        readable=False
        unreadable_dirs[dname] = True

    try:
        perm, size, date, hms, filepath = line.split()
    except ValueError:
        raise
    ctime = ctime_from_rsync(date, hms)
    category_directories[dname] = {'files':{}, 'isRepository':False, 'readable':readable, 'ctime':ctime, 'changed':True}
    if d.endswith('repodata'):
        parent_dname = dname[:-len('/repodata')]
        try:
            category_directories[parent_dname]['isRepository'] = True
        except KeyError:
            category_directories[parent_dname] = {'files':{}, 'isRepository':True, 'readable':readable, 'ctime':ctime, 'changed':True}
            
    return dname, category_directories

def add_file_to_directory(line, path, category_directories):
    try:
        perm, size, date, hms, filepath = line.split()
    except ValueError:
        return
    try:
        dt = ctime_from_rsync(date, hms)
    except ValueError:
        return

    l = filepath.split('/')
    filename = l[-1]
    subpath = l[:-1]
    if len(subpath) > 0:
        dirpath = ("%s/" % path) + '/'.join(subpath)
    else:
        dirpath = path
    category_directories[dirpath]['files'][filename] = {'size':size,
                                                        'stat':dt}

def short_filelist(files):
    html=0
    rpms=0
    hdrs=0
    drpms=0
    for f in files.keys():
        if f.endswith('.html'): html=html+1
        if f.endswith('.rpm'):  rpms=rpms+1
        if f.endswith('.hdr'):  hdrs=hdrs+1
        if f.endswith('.drpm'):  drpms=drpms+1
    if html>10 or rpms > 10 or hdrs > 10 or drpms > 10:
        date_file_list = []
        rc = {}
        for k in files.keys():
            date_file_tuple = (files[k]['stat'], k, files[k]['size'])
            date_file_list.append(date_file_tuple)
            date_file_list.sort()
            date_file_list = date_file_list[-10:]
        
        for stat, k, size in date_file_list:
            rc[k] = files[k]
        return rc
    else:
        return files

def sync_category_directories(category, category_directories):
    excludes=['.snapshot', '.~tmp~']
    for dirpath, value in category_directories.iteritems():
        if excludes[0] in dirpath or excludes[1] in dirpath:
            continue
        try:
            dir = Directory.byName(dirpath)
            if dir.readable != value['readable']:
                dir.readable = value['readable']
            if dir.ctime != value['ctime']:
                dir.ctime = value['ctime']
        except SQLObjectNotFound:
            dir = Directory(name=dirpath,readable=value['readable'], ctime=value['ctime'])
            dir.addCategory(category)
        if value['changed']:
            if dir.files != short_filelist(value['files']):
                dir.files = short_filelist(value['files'])
        make_file_details_from_checksums(dir)

    # this has to be a second pass to be sure the child repodata/ dir is created in the db first
    for dirpath, value in category_directories.iteritems():
        dir = Directory.byName(dirpath)
        if value['isRepository']:
            make_repository(dir, category)
        make_repomd_file_details(dir)
    ageFileDetails()

def parse_rsync_listing(cname, f):
    category = Category.byName(cname)
    category_directories = {}
    for line in f:
        line.strip()
        l = line.split()
        if line.startswith('d') and len(l) == 5 and len(l[0]) == 10: # good guess it's a directory line
            if re.compile('^\.$').match(line):
                # we know the top-level category directory already exists, don't try to re-make it
                # fixme I don't think this ever hits
                pass
            else:
                dname, category_directories = make_one_directory(line, category, category.topdir.name, category_directories)
        else:
            add_file_to_directory(line, category.topdir.name, category_directories)
    sync_category_directories(category, category_directories)



def sync_directories_using_rsync(rsyncpath, cname, extra_rsync_args=None):
    cmd = "rsync --temp-dir=/tmp -r --exclude=.snapshot --exclude='*.~tmp~'"
    if extra_rsync_args is not None:
        cmd += ' ' + extra_rsync_args
    cmd += ' ' + rsyncpath
    try:
        devnull = open('/dev/null', 'r')
        p = Popen(cmd, shell=True, stdin=devnull, stdout=PIPE, bufsize=1, close_fds=True)
    except:
        print "Category %s: Unable to run %s" % (cname, args)
        devnull.close()
        return
    parse_rsync_listing(cname, p.stdout)
    devnull.close()

def sync_directories_from_file(filename, cname):
    f = open(filename, 'r')
    parse_rsync_listing(cname, f)
    f.close()


def sync_directories_from_directory(directory, cname, excludes=[]):
    global unreadable_dirs
    # drop trailing slashes from path
    directory = directory.rstrip('/')
    category = Category.byName(cname)
    category_directories = {}
    topdir = category.topdir
    topdirName = topdir.name
    # fixme
    # if directory looks like /path/to/something/pub/fedora/linux
    # and topdir.name is pub/fedora/linux
    # which means we keep only [:-len(topdir.name)]
    # we want to os.walk(directory)
    # and the resulting dirpaths look like /path/to/something/pub/fedora/linux
    # and the matching directory is pub/fedora/linux.
    # paths below this then have directories of $path/dirname[len(topdir)+1:]
    stdexcludes=['.*\.snapshot', '.*/\.~tmp~']
    for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
        next=False
        for e in stdexcludes + excludes:
            if re.compile(e).match(dirpath):
                next=True
                break
        if next:
            print "excluding %s" % (dirpath)
            # exclude all its subdirs too
            dirnames[:] = []
            continue

        # avoid disappearing files
        try:
            s = os.stat(dirpath)
            ctime = s[stat.ST_CTIME]
        except OSError:
            continue

        i = string.index(dirpath, topdirName)
        dname = dirpath[i:]
        dname = dname.rstrip('/')
        try:
            d = Directory.byName(dname)
            d_ctime = d.ctime
        except SQLObjectNotFound:
            # we'll need to create it
            d_ctime = 0

        mode = s.st_mode
        readable = (mode & stat.S_IRWXO & (stat.S_IROTH|stat.S_IXOTH))
        if not readable:
            unreadable_dirs[dname] = True
        isRepo = 'repodata' in dirnames

        changed = (d_ctime != ctime)
        if changed:
            print "%s has changed" % dname
        category_directories[dname] = {'files':{}, 'isRepository':isRepo, 'readable':readable, 'ctime':ctime, 'changed':changed}

        # skip per-file stat()s if the directory hasn't changed
        if changed:
            for f in filenames:
                try:
                    s = os.stat(os.path.join(dirpath, f))
                except OSError:
                    continue
                category_directories[dname]['files'][f] = {'size':str(s.st_size),
                                                           'stat':s[stat.ST_CTIME]}

    sync_category_directories(category, category_directories)

def main():
    # look on the command line for a desired config file
    if len(sys.argv) < 2:
        print "usage: update-master-directory-list dev.cfg [rootdir]"
        sys.exit(1)
    if len(sys.argv) >= 2:
        turbogears.update_config(configfile=sys.argv[1], modulename="mirrormanager.config")
    if len(sys.argv) >= 3:
        rootdir=sys.argv[2]

    if manage_pidfile(pidfile):
        print "another instance is running, try again later."
        sys.exit(1)

    print "Starting umdl %s" % (datetime.datetime.utcnow().isoformat())
    for i in config.get('umdl.master_directories'):
        try:
            options = i['options']
        except KeyError:
            options = None

        if i['type'] == 'rsync':
            sync_directories_using_rsync(i['url'], i['category'], options)

        if i['type'] == 'file':
            sync_directories_from_file(i['url'], i['category'])

        if i['type'] == 'directory':
            excludes = i.get('excludes', [])
            sync_directories_from_directory(i['path'], i['category'], excludes)

    remove_pidfile(pidfile)
    print "Ending umdl %s" % (datetime.datetime.utcnow().isoformat())

    return 0

if __name__ == "__main__":
    sys.exit(main())
