test_loopy.py

                lp.GlobalArg("c", shape=lp.auto),
                lp.ValueArg("n"),
                ],
            assumptions="n>=1")

    print knl
    print lp.CompiledKernel(ctx, knl).get_highlighted_code()


def test_arg_guessing(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j]: 0<=i,j<n }",
            ],
            """
                a = 1.5 + sum((i,j), i*j)
                b[i, j] = i*j
                c[i+j, j] = b[j,i]
                """,
            assumptions="n>=1")

    print knl
    print lp.CompiledKernel(ctx, knl).get_highlighted_code()


def test_arg_guessing_with_reduction(ctx_factory):
    #logging.basicConfig(level=logging.DEBUG)
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j]: 0<=i,j<n }",
            ],
            """
                a = 1.5 + sum((i,j), i*j)
                d = 1.5 + sum((i,j), b[i,j])
                b[i, j] = i*j
                c[i+j, j] = b[j,i]
                """,
            assumptions="n>=1")

    print knl
    print lp.CompiledKernel(ctx, knl).get_highlighted_code()

# }}}


def test_nonlinear_index(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j]: 0<=i,j<n }",
            ],
            """
                a[i*i] = 17
                """,
            [
                lp.GlobalArg("a", shape="n"),
                lp.ValueArg("n"),
                ],
            assumptions="n>=1")

    print knl
    print lp.CompiledKernel(ctx, knl).get_highlighted_code()


def test_triangle_domain(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j]: 0<=i,j<n and i <= j}",
            ],
            "a[i,j] = 17",
            assumptions="n>=1")

    print knl
    print lp.CompiledKernel(ctx, knl).get_highlighted_code()


def test_offsets_and_slicing(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    n = 20

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j]: 0<=i<n and 0<=j<m }",
            ],
            """
                b[i,j] = 2*a[i,j]
                """,
            assumptions="n>=1 and m>=1",
            default_offset=lp.auto)

    knl = lp.tag_data_axes(knl, "a,b", "stride:auto,stride:1")

    cknl = lp.CompiledKernel(ctx, knl)

    a_full = cl.clrandom.rand(queue, (n, n), np.float64)
    a_full_h = a_full.get()
    b_full = cl.clrandom.rand(queue, (n, n), np.float64)
    b_full_h = b_full.get()

    a_sub = (slice(3, 10), slice(5, 10))
    a = a_full[a_sub]

    b_sub = (slice(3+3, 10+3), slice(5+4, 10+4))
    b = b_full[b_sub]

    b_full_h[b_sub] = 2*a_full_h[a_sub]

    print cknl.get_highlighted_code({"a": a.dtype})
    cknl(queue, a=a, b=b)

    import numpy.linalg as la
    assert la.norm(b_full.get() - b_full_h) < 1e-13


def test_vector_ilp_with_prefetch(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0],
            "{ [i]: 0<=i<n }",
            "out[i] = 2*a[i]",
            [
                # Tests that comma'd arguments interoperate with
                # argument guessing.
                lp.GlobalArg("out,a", np.float32, shape=lp.auto),
                "..."
                ])

    knl = lp.split_iname(knl, "i", 128, inner_tag="l.0")
    knl = lp.split_iname(knl, "i_outer", 4, outer_tag="g.0", inner_tag="ilp")
    knl = lp.add_prefetch(knl, "a", ["i_inner", "i_outer_inner"])

    cknl = lp.CompiledKernel(ctx, knl)
    cknl.cl_kernel_info()

    import re
    code = cknl.get_code()
    assert len(list(re.finditer("barrier", code))) == 1


def test_convolution_like(ctx_factory):
    ctx = ctx_factory()

    dtype = np.float64

    knl = lp.make_kernel(ctx.devices[0],
        "{ [im_x, im_y, f_x, f_y]: -f_w <= f_x,f_y <= f_w \
            and f_w <= im_x < im_w-f_w and f_w <= im_y < im_h-f_w }",
        """
        out[im_x-f_w, im_y-f_w] = sum((f_x, f_y), \
            img[im_x-f_x, im_y-f_y] * f[f_w+f_x, f_w+f_y])
        """,
        [
            lp.GlobalArg("f", dtype, shape=lp.auto),
            lp.GlobalArg("img", dtype, shape=lp.auto),
            lp.GlobalArg("out", dtype, shape=lp.auto),
            "..."
            ],
        assumptions="f_w>=1 and im_w, im_h >= 1")

    ref_knl = knl

    def variant(knl):
        knl = lp.split_iname(knl, "im_x", 16, inner_tag="l.0")
        return knl

    lp.auto_test_vs_ref(ref_knl, ctx, variant(knl),
            parameters={"im_w": 1024, "im_h": 1024, "f_w": 7})


def test_c_instruction(ctx_factory):
    #logging.basicConfig(level=logging.DEBUG)
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j]: 0<=i,j<n }",
            ],
            [
                lp.CInstruction("i", """
                    x = sin((float) i);
                    """, assignees="x"),
                "a[i*i] = x",
                ],
            [
                lp.GlobalArg("a", shape="n"),
                lp.ValueArg("n"),
                lp.TemporaryVariable("x", np.float32),
                ],
            assumptions="n>=1")

    knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")

    print knl
    print lp.CompiledKernel(ctx, knl).get_highlighted_code()


def test_dependent_domain_insn_iname_finding(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[isrc_box]: 0<=isrc_box<nsrc_boxes}",
            "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}",
            ],
            """
                <> src_ibox = source_boxes[isrc_box]
                <> isrc_start = box_source_starts[src_ibox]
                <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox]
                <> strength = strengths[isrc] {id=set_strength}
                """,
            [
                lp.GlobalArg("box_source_starts,box_source_counts_nonchild",
                    None, shape=None),
                lp.GlobalArg("strengths",
                    None, shape="nsources"),
                "..."])

    print knl
    assert "isrc_box" in knl.insn_inames("set_strength")

    print lp.CompiledKernel(ctx, knl).get_highlighted_code(
            dict(
                source_boxes=np.int32,
                box_source_starts=np.int32,
                box_source_counts_nonchild=np.int32,
                strengths=np.float64,
                nsources=np.int32,
                ))


def test_inames_deps_from_write_subscript(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j]: 0<=i,j<n}",
            ],
            """
                <> src_ibox = source_boxes[i]
                <int32> something = 5
                a[src_ibox] = sum(j, something) {id=myred}
                """,
            [
                lp.GlobalArg("box_source_starts,box_source_counts_nonchild,a",
                    None, shape=None),
                "..."])

    print knl
    assert "i" in knl.insn_inames("myred")


def test_split_reduction(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(ctx.devices[0], [
            "{[i,j,k]: 0<=i,j,k<n}",
            ],
            """
                b = sum((i,j,k), a[i,j,k])
                """,
            [
                lp.GlobalArg("box_source_starts,box_source_counts_nonchild,a",
                    None, shape=None),
                "..."])

    knl = lp.split_reduction(knl, "j,k", "out")
    print knl
    # FIXME: finish test


if __name__ == "__main__":
    if len(sys.argv) > 1:
        exec(sys.argv[1])
    else:
        from py.test.cmdline import main
        main([__file__])

# vim: foldmethod=marker