#!/usr/bin/python
#
# Copyright (c) 2015 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pwd
import sys
import stat
import errno
import xattr
import cPickle as pickle
import multiprocessing

from optparse import OptionParser
from gluster.swift.common.utils import write_metadata, SafeUnpickler, \
    METADATA_KEY, MAX_XATTR_SIZE


ORIGINAL_EUID = os.geteuid()
NOBODY_UID = pwd.getpwnam('nobody').pw_uid


def print_msg(s):
    global options
    if options.verbose:
        print(s)


def clean_metadata(path, key_count):
    """
    Can only be used when you know the key_count. Saves one unnecessarry
    removexattr() call. Ignores error when file or metadata isn't found.
    """
    for key in xrange(0, key_count):
        try:
            xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or '')))
        except IOError as err:
            if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
                print_msg("xattr.removexattr(%s, %s%s) failed: %s" %
                          (path, METADATA_KEY, (key or ''), err.errno))


def process_object(path):

    metastr = ''
    key_count = 0
    try:
        while True:
            metastr += xattr.getxattr(path, '%s%s' %
                                      (METADATA_KEY, (key_count or '')))
            key_count += 1
            if len(metastr) < MAX_XATTR_SIZE:
                # Prevent further getxattr calls
                break
    except IOError as err:
        if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
            print_msg("xattr.getxattr(%s, %s%s) failed: %s" %
                      (path, METADATA_KEY, (key_count or ''), err.errno))

    if not metastr:
        return

    if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
        # It's pickled. If unpickling is successful and metadata is
        # not stale write back the metadata by serializing it.
        try:
            os.seteuid(NOBODY_UID)  # Drop privileges
            metadata = SafeUnpickler.loads(metastr)
            os.seteuid(ORIGINAL_EUID)  # Restore privileges
            assert isinstance(metadata, dict)
        except (pickle.UnpicklingError, EOFError, AttributeError,
                IndexError, ImportError, AssertionError):
            clean_metadata(path, key_count)
        else:
            try:
                # Remove existing metadata first before writing new metadata
                clean_metadata(path, key_count)
                write_metadata(path, metadata)
                print_msg("%s MIGRATED" % (path))
            except IOError as err:
                if err.errno not in (errno.ENOENT, errno.ESTALE):
                    raise
    elif metastr.startswith("{") and metastr.endswith("}"):
        # It's not pickled and is already serialized, just return
        print_msg("%s SKIPPED" % (path))
    else:
        # Metadata is malformed
        clean_metadata(path, key_count)
        print_msg("%s CLEANED" % (path))


def walktree(top, pool, root=True):
    """
    Recursively walk the filesystem tree and migrate metadata of each object
    found. Unlike os.walk(), this method performs stat() sys call on a
    file/directory at most only once.
    """

    if root:
        # The root of volume is account which also contains metadata
        pool.apply_async(process_object, (top, ))

    for f in os.listdir(top):
        if root and f in (".trashcan", ".glusterfs", "async_pending", "tmp"):
            continue
        path = os.path.join(top, f)
        try:
            s = os.stat(path)
        except OSError as err:
            if err.errno in (errno.ENOENT, errno.ESTALE):
                continue
            raise
        if stat.S_ISLNK(s.st_mode):
            pass
        elif stat.S_ISDIR(s.st_mode):
            pool.apply_async(process_object, (path, ))
            # Recurse into directory
            walktree(path, pool, root=False)
        elif stat.S_ISREG(s.st_mode):
            pool.apply_async(process_object, (path, ))


if __name__ == '__main__':

    global options

    usage = "usage: %prog [options] volume1_mountpath volume2_mountpath..."
    description = """Account, container and object metadata are stored as \
extended attributes of files and directories. This utility migrates metadata \
stored in pickled format to JSON format."""
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-v", "--verbose", dest="verbose",
                      action="store_true", default=False,
                      help="Print object paths as they are processed.")
    (options, mount_paths) = parser.parse_args()

    if len(mount_paths) < 1:
        print "Mountpoint path(s) missing."
        parser.print_usage()
        sys.exit(-1)

    pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)

    for path in mount_paths:
        if os.path.isdir(path):
            walktree(path, pool)

    pool.close()
    pool.join()
