#! /usr/bin/python3
#
# Copyright (C) 2016 Citrix Systems R&D Ltd.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; version 2.1 only. with the special
# exception on linking described in file LICENSE.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.

import os
import sys
import socket
import errno
import stat
import pwd
import grp
import subprocess
import ctypes
import ctypes.util
import os
from resource import getrlimit, RLIMIT_CORE, RLIMIT_FSIZE, setrlimit

import xen.lowlevel.xs as xs


#
# Constants from Xen's public/hvm/e820.h
#
HVM_BELOW_4G_RAM_END = 0xf0000000
HVM_BELOW_4G_MMIO_START = HVM_BELOW_4G_RAM_END
HVM_BELOW_4G_MMIO_LENGTH = (1 << 32) - HVM_BELOW_4G_MMIO_START

dic_mac = {}
open_fds = []

xenstore = xs.xs()

# Set cgroup_slice to the name of the cgroup slice the qemu-dm process
# should live in.
#  - None means leave in the same slice as the parent process.
#  - '' means move it into the default slice.
#  - 'system.slice' means move it into the system slice, etc.
# If the nominated slice does not already exist, the process will be
# left in its parent's slice.
cgroup_slice = 'vm.slice'

CLONE_NEWNS = 0x00020000 # mount namespace
CLONE_NEWNET = 0x40000000 # network namespace
CLONE_NEWIPC = 0x08000000 # IPC namespace

def unshare(flags):
    libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
    unshare_prototype = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, use_errno=True)
    unshare = unshare_prototype(('unshare', libc))
    ret = unshare(flags)
    if ret < 0:
        raise OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno()))

def restrict_fsize():
    limit = 1024 * 1024
    setrlimit(RLIMIT_FSIZE, (limit, limit))

def enable_core_dumps():

    limit = 64 * 1024 * 1024
    oldlimits = getrlimit(RLIMIT_CORE)
    hardlimit = oldlimits[1]
    if limit > hardlimit:
        hardlimit = limit
        setrlimit(RLIMIT_CORE, (limit, hardlimit))
        return limit

def xenstore_read(path):
    return xenstore.read("", path)

def xenstore_write(path, value):
    return xenstore.write("", path, value)

def xenstore_ls(path):
    return xenstore.ls("", path)

def close_fds():
    for i in open_fds:
        os.close(i)

def prepare_exec():
    """Set up the execution environment for QEMU."""

    if cgroup_slice is not None:
        # Move to nominated cgroup slice
        print("Moving to cgroup slice '%s'" % cgroup_slice)
        try:
            # Note the default slice uses /sys/fs/cgroup/cpu/tasks but
            # other.slice uses /sys/fs/cgroup/cpu/other.slice/tasks.
            g = open("/sys/fs/cgroup/cpu/%s/tasks" % cgroup_slice, 'w')
            g.write(str(os.getpid()))
            g.close()
        except IOError as e:
            print("Warning: writing pid to '%s' tasks file: %s" \
                % (cgroup_slice, e))

    core_dump_limit = enable_core_dumps()
    print("core dump limit: %d" % core_dump_limit)

    if not file_serial:
        restrict_fsize()

    flags = CLONE_NEWNS | CLONE_NEWIPC
    # serial redirection for debug may be using the network so don't
    # unshare in this case
    if not network_serial:
        flags |= CLONE_NEWNET
    unshare(flags)

    sys.stdout.flush()
    sys.stderr.flush()

def main(argv):
    print("Arguments: %s" % " ".join(argv[1:]))

    for n in range(len(argv)):
        if argv[n] == "-xen-domid":
            domid = int(argv[n+1])
            break

    qemu_dm = '/usr/lib64/xen/bin/qemu-system-i386'
    qemu_args = ['qemu-dm-%d' % domid]

    mmio_start = HVM_BELOW_4G_MMIO_START
    # vGPU now requires extra space in lower MMIO hole by default
    if '-vgpu' in argv:
        mmio_start -= HVM_BELOW_4G_MMIO_LENGTH

    # read the size of lower MMIO hole provisioned by xenguest
    mmio_size = xenstore_read("/local/domain/%d/vm-data/mmio-hole-size" % domid)
    if mmio_size:
        mmio_start = (1 << 32) - int(mmio_size)

    # don't allow hvmloader to change the lower MMIO hole size
    xenstore_write("/local/domain/%d/hvmloader/allow-memory-relocate" % domid, "0")
    # prepare the correct set of ACPI tables in hvmloader
    xenstore_write("/local/domain/%d/platform/device-model" % domid, "qemu_xen")

    trad_compat = '-trad-compat' in argv
    igdpt = ''
    machine = 'pc-0.10' if trad_compat else 'pc-i440fx-2.10'

    qemu_args.extend(['-machine',
                      '%s,accel=xen,max-ram-below-4g=%lu,'
                      'suppress-vmdesc=on,'
                      'allow-unassigned=true,trad_compat=%s%s'
                      % (machine, mmio_start, trad_compat, igdpt)])

    qemu_args.extend(argv[2:])

    n = 0

    vga_type = 'cirrus-vga'
    vgamem_mb = 4
    vga_extra_props = ["addr=2"]
    vga_extra_props += ["romfile="]
    if trad_compat:
        vga_extra_props += ["rombar=1",
                            "subvendor_id=0x5853,subsystem_id=0x0001"]
    upgraded_save_image = None
    serial_c = ''
    tmp_serial_c = ''
    depriv = True

    global network_serial
    network_serial = False
    global file_serial
    file_serial = False

    while n < len(qemu_args):
        p = qemu_args[n]

        if p == "-netdev":
            params = qemu_args[n + 1].split(',')
            for param in params:
                if param.startswith('fd='):
                    open_fds.append(int(param.split('=')[1]))

        if p == "--syslog":
            del qemu_args[n]
            continue

        if p == "-priv":
            del qemu_args[n]
            depriv = False
            continue

        if p == "-serial":
            params = qemu_args[n + 1].split(',')
            if params[0].startswith('tcp'):
                print("serial: '%s'" % qemu_args[n + 1])
                network_serial = True
            elif params[0].startswith('file:'):
                file_serial = True

            n += 1
            continue

        if p == "-chardev":
            if qemu_args[n + 1].startswith('file,id=serial0,'):
                file_serial = True
            n += 1
            continue

        if p == "-trad-compat":
            del qemu_args[n]
            continue

        if p == "-acpi":
            del qemu_args[n]
            continue

        if p == "-videoram":
            vgamem_mb = int(qemu_args[n+1])
            del qemu_args[n]
            del qemu_args[n]
            continue

        if p == "-std-vga":
            vga_type = 'VGA'
            if trad_compat:
                vga_extra_props += ['qemu-extended-regs=false']
            del qemu_args[n]
            continue

        if p == "-vgpu":
            vga_type = 'vgpu'
            del qemu_args[n]
            continue

        if p == "-xengt":
            vga_type = 'VGA'
            if trad_compat:
                vga_extra_props += ['qemu-extended-regs=false']
            n += 1
            continue

        if p == "-loadvm":
            loadvm_path = qemu_args[n+1]
            incoming_fd = os.open(loadvm_path, os.O_RDONLY)
            qemu_args[n] = "-incoming"
            qemu_args[n+1] = "fd:%d" % incoming_fd
            open_fds.append(incoming_fd)

        n += 1

    if vga_type == 'vgpu':
        qemu_args += ["-device", "vgpu"]
    else:
        qemu_args += ["-device", ",".join([vga_type, "vgamem_mb=%d" % vgamem_mb] \
                                          + vga_extra_props)]

    s1, s2 = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM)
    open_fds.append(s1.fileno())
    qemu_args += ["-vnc-clipboard-socket-fd", str(s1.fileno())]
    #qemu_args += ["-trace", "events=/root/trace"]
    #qemu_args += ["-monitor", "tcp:127.0.0.1:7777,server,nowait"]
    qemu_args += ["-chardev", "stdio,id=ovmf", "-device", "isa-debugcon,chardev=ovmf,iobase=0x402"]

    if depriv:
        root_dir = "/var/xen/qemu/root-{}".format(domid)
        try:
            os.makedirs(root_dir + "/dev", 0o755)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        try:
            os.mknod(root_dir + "/dev/null", 0o644 | stat.S_IFCHR,
                     os.makedev(1, 3))
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        qemu_args += ["-xen-domid-restrict"]
        qemu_args += ["-chroot", root_dir]

        uid = pwd.getpwnam('qemu_base').pw_uid + domid
        gid = grp.getgrnam('qemu_base').gr_gid + domid
        qemu_args += ["-runas", "%d:%d" % (uid, gid)]

    xenstore_write("/libxl/%d/dm-version" % domid, "qemu_xen")

    print("Exec: %s %s" % (qemu_dm, " ".join(qemu_args)))

    clipboardd = '/opt/xensource/libexec/xcp-clipboardd'
    subprocess.call([clipboardd, "-d", str(domid), "-s", str(s2.fileno())],
                    pass_fds=[s2.fileno()])

    s2.close()

    # set up library preload path for qemu such that it can use jemalloc
    qemu_env = os.environ
    if "LD_PRELOAD" not in qemu_env:
       qemu_env["LD_PRELOAD"] = "/usr/lib64/libjemalloc.so.2"
    else:
       qemu_env["LD_PRELOAD"] = "/usr/lib64/libjemalloc.so.2:" + qemu_env["LD_PRELOAD"]
    qemu_env["MALLOC_CONF"] = "background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false"

    sys.stdout.flush()
    sys.stderr.flush()

    qemu = subprocess.Popen(qemu_args, executable=qemu_dm, env=qemu_env,
                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                            preexec_fn=prepare_exec, pass_fds=open_fds)

    xenstore_write("/local/domain/%d/qemu-pid" % domid, "%d" % qemu.pid)
    xenstore_write("/local/domain/%d/image/device-model-pid" % domid, "%d" % qemu.pid)

    # Redirect output from QEMU to logger
    os.dup2(qemu.stdout.fileno(), 0)
    qemu.stdout.close()

    # Close all unneeded fds
    open_fds.extend([1, 2])
    close_fds()

    os.execvp('logger', ['logger', '-p', 'daemon.info', '-t',
                         'qemu-dm-%d[%d]' % (domid, qemu.pid)])

if __name__ == '__main__':
    raise SystemExit(main(sys.argv))
