import numpy as np
import numpy.linalg as la  # noqa
import pyopencl as cl
import pyopencl.array  # noqa
import pyopencl.clrandom  # noqa
import loopy as lp

from pytest import approx


_WENO_PRG = []
_QUEUE = []

def get_gpu_transformed_weno():
    prg = get_weno_program()

    cfd = prg["compute_flux_derivatives"]

    cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0")

    cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized",
            lp.AddressSpace.GLOBAL)
    cfd = lp.set_temporary_scope(cfd, "generalized_fluxes",
            lp.AddressSpace.GLOBAL)
    cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp",
            lp.AddressSpace.GLOBAL)

    for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]:
        cfd = lp.split_iname(cfd, "i"+suffix, 16,
                outer_tag="g.0", inner_tag="l.0")
        cfd = lp.split_iname(cfd, "j"+suffix, 16,
                outer_tag="g.1", inner_tag="l.1")

    for var_name in ["delta_xi", "delta_eta", "delta_zeta"]:
        cfd = lp.assignment_to_subst(cfd, var_name)

    cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute")
    cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff")
    cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute")
    cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff")
    cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute")
    cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff")
    cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized")

    prg = prg.with_kernel(cfd)

    # FIXME: These should work, but don't
    # FIXME: Undo the hand-inlining in WENO.F90
    #prg = lp.inline_callable_kernel(prg, "convert_to_generalized")
    #prg = lp.inline_callable_kernel(prg, "convert_from_generalized")

    if 0:
        print(prg["convert_to_generalized_frozen"])
        1/0

    return prg


def get_queue(ctx_factory):
    if not _QUEUE:
        ctx = ctx_factory()
        _QUEUE.append(cl.CommandQueue(ctx))
    return _QUEUE[0]


def with_root_kernel(prg, root_name):
    # FIXME This is a little less beautiful than it could be
    new_prg = prg.copy(name=root_name)
    for name in prg:
        clbl = new_prg[name]
        if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host:
            new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False))

    new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True))
    return new_prg


def get_weno_program():
    if _WENO_PRG:
        return _WENO_PRG[0]

    fn = "WENO.F90"

    with open(fn, "r") as infile:
        infile_content = infile.read()

    prg = lp.parse_transformed_fortran(infile_content, filename=fn)
    _WENO_PRG.append(prg)
    return prg

def f_array(queue, *shape):
    ary = np.random.random_sample(shape).astype(np.float32).copy(order="F")
    return cl.array.to_device(queue, ary)

def compute_flux_derivatives(ctx_factory,
        nvars, ndim, nx, ny, nz,
        states, fluxes, metrics, metric_jacobians):

    queue = get_queue(ctx_factory)

    prg = get_weno_program()
    cfd = prg["compute_flux_derivatives"]

    cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0")

    cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized",
            lp.AddressSpace.GLOBAL)
    cfd = lp.set_temporary_scope(cfd, "generalized_fluxes",
            lp.AddressSpace.GLOBAL)
    cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp",
            lp.AddressSpace.GLOBAL)

    prg = prg.with_kernel(cfd)

    flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6,
        nz+6), dtype=np.float32, order="F")

    prg(queue, nvars=nvars, ndim=ndim,
            states=states, fluxes=fluxes, metrics=metrics,
            metric_jacobians=metric_jacobians,
            flux_derivatives=flux_derivatives_dev)
    return flux_derivatives_dev.get()
