test_loopy.py

            """ % dict(condition=condition,
                       insn=insn,
                       extra=extra_insns if extra_insns else ''),
            data
            )

        knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
        knl = lp.split_array_axis(knl, names, 0, 4)
        knl = lp.tag_array_axes(knl, names, 'N0,vec')
        if v is not None:
            knl = lp.set_options(knl, write_wrapper=True)

        queue = cl.CommandQueue(ctx)
        if check is not None:
            assert check(knl)
        elif exception is not None:
            with pytest.raises(exception):
                knl(queue, **kwargs)
        else:
            if not isinstance(answer, tuple):
                answer = (answer,)
            result = knl(queue, **kwargs)[1]
            for r, a in zip(result, answer):
                assert np.array_equal(r.flatten('C'), a)

    ans = np.zeros(12, dtype=np.int32)
    ans[7:] = 1
    from loopy.diagnostic import LoopyError
    # 1) test a conditional on a vector iname -- currently unimplemented as it
    # would require creating a 'shadow' vector iname temporary
    create_and_test('a[i] = 1', 'i > 6', ans, exception=LoopyError)
    # 2) condition on a vector array
    create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.arange(
        12, dtype=np.int32).reshape((3, 4)))
    # 3) condition on a vector temporary -- this is currently broken for the
    # same reason as #1
    create_and_test('a[i] = 1', 'c', ans, extra_insns='<> c = i < 6',
                    exception=LoopyError)
    # 4) condition on an assigned vector array, this should work as assignment to a
    # vector can be safely unrolled
    create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.zeros((3, 4), dtype=np.int32),
                    extra_insns='b[i] = i')
    # 5) a block of simple assignments, this should be seemlessly translated to
    # multiple vector if statements
    c_ans = np.ones(12, dtype=np.int32)
    c_ans[7:] = 0
    create_and_test('a[i] = 1\nc[i] = 0', 'b[i] > 6', (ans, c_ans), b=np.arange(
        12, dtype=np.int32).reshape((3, 4)), c=np.ones((3, 4), dtype=np.int32))
    # 6) test a negated conditional
    ans_negated = np.invert(ans) + 2
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.int32).reshape((3, 4)))
    # 7) test conditional on differing dtype
    ans_negated = np.invert(ans) + 2
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.int64).reshape((3, 4)))
    # 8) test conditional on differing dtype (float->int) and (int->float)
    ans_negated = np.invert(ans) + 2
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.float64).reshape((3, 4)))
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.int64).reshape((3, 4)), a=np.zeros((3, 4), dtype=np.float32))
    # 9) test conditional on valuearg, the "test" here is that we can actually
    # generate the code
    create_and_test('a[i] = 1', 'v', np.ones_like(ans), v=1)


@pytest.mark.parametrize(('lhs_dtype', 'rhs_dtype'), [
    (np.int32, np.int64),
    (np.float32, np.float64)])
def test_explicit_vector_dtype_conversion(ctx_factory, lhs_dtype, rhs_dtype):
    ctx = ctx_factory()

    # test that dtype conversion happens correctly between differing vector-dtypes

    vw = 4
    a_lp = lp.GlobalArg('a', shape=(12,), dtype=rhs_dtype)
    temp_lp = lp.TemporaryVariable('temp', dtype=lhs_dtype)

    knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
            """
            for i
                temp = a[i]
            end
            """,
            [a_lp, temp_lp])
    knl = lp.split_iname(knl, 'i', vw, inner_tag='vec')
    knl = lp.split_array_axis(knl, 'a', 0, 4)
    knl = lp.tag_array_axes(knl, 'a', 'N0,vec')

    queue = cl.CommandQueue(ctx)
    knl(queue, a=np.zeros((12,), dtype=rhs_dtype).reshape((3, 4)))


def test_vectorizability():
    # check new vectorizability conditions
    from loopy.kernel.array import VectorArrayDimTag
    from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type

    def create_and_test(insn, exception=None, a=None, b=None):
        a = np.zeros((3, 4), dtype=np.int32) if a is None else a
        data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
        kwargs = dict(a=a)
        if b is not None:
            data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
            kwargs['b'] = b
        names = [d.name for d in data]

        knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
            """
            for i
                %(insn)s
            end
            """ % dict(insn=insn),
            data
            )

        knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
        knl = lp.split_array_axis(knl, names, 0, 4)
        knl = lp.tag_array_axes(knl, names, 'N0,vec')
        knl = lp.preprocess_kernel(knl)
        lp.generate_code_v2(knl).device_code()
        assert knl.instructions[0].within_inames & set(['i_inner'])
        assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
        assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
        assert filter_iname_tags_by_type(knl.iname_to_tags['i_inner'], VectorizeTag)

    def run(op_list=[], unary_operators=[], func_list=[], unary_funcs=[]):
        for op in op_list:
            template = 'a[i] = a[i] %(op)s %(rval)s' \
                if op not in unary_operators else 'a[i] = %(op)s a[i]'

            create_and_test(template % dict(op=op, rval='1'))
            create_and_test(template % dict(op=op, rval='a[i]'))
        for func in func_list:
            template = 'a[i] = %(func)s(a[i], %(rval)s)' \
                if func not in unary_funcs else 'a[i] = %(func)s(a[i])'
            create_and_test(template % dict(func=func, rval='1'))
            create_and_test(template % dict(func=func, rval='a[i]'))

    # 1) comparisons
    run(['>', '>=', '<', '<=', '==', '!='])

    # 2) logical operators
    run(['and', 'or', 'not'], ['not'])

    # 3) bitwise operators
    # bitwise xor '^' not not implemented in codegen
    run(['~', '|', '&'], ['~'])

    # 4) functions -- a random selection of the enabled math functions in opencl
    run(func_list=['acos', 'exp10', 'atan2', 'round'],
        unary_funcs=['round', 'acos', 'exp10'])


def test_check_for_variable_access_ordering():
    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            """
            a[i] = 12
            a[i+1] = 13
            """)

    knl = lp.preprocess_kernel(knl)

    from loopy.diagnostic import VariableAccessNotOrdered
    with pytest.raises(VariableAccessNotOrdered):
        lp.get_one_scheduled_kernel(knl)


def test_check_for_variable_access_ordering_with_aliasing():
    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            """
            a[i] = 12
            b[i+1] = 13
            """,
            [
                lp.TemporaryVariable("a", shape="n+1", base_storage="tmp"),
                lp.TemporaryVariable("b", shape="n+1", base_storage="tmp"),
                ])

    knl = lp.preprocess_kernel(knl)

    from loopy.diagnostic import VariableAccessNotOrdered
    with pytest.raises(VariableAccessNotOrdered):
        lp.get_one_scheduled_kernel(knl)


@pytest.mark.parametrize(("second_index", "expect_barrier"),
        [
            ("2*i", True),
            ("2*i+1", False),
            ])
def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
    knl = lp.make_kernel(
            "{[i]: 0<=i<128}",
            """
            a[2*i] = 12  {id=first}
            a[%s] = 13  {id=second,dep=first}
            """ % second_index,
            [
                lp.TemporaryVariable("a", lp.auto, shape=(256,),
                    scope=lp.temp_var_scope.LOCAL),
                ])

    knl = lp.tag_inames(knl, "i:l.0")

    knl = lp.preprocess_kernel(knl)
    knl = lp.get_one_scheduled_kernel(knl)

    assert barrier_between(knl, "first", "second") == expect_barrier


def test_half_complex_conditional(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
            "{[i]: 0 <= i < 10}",
            """
           tmp[i] = if(i < 5, 0, 0j)
           """)

    knl(queue)


def test_dep_cycle_printing_and_error():
    # https://gitlab.tiker.net/inducer/loopy/issues/140
    # This kernel has two dep cycles.

    knl = lp.make_kernel('{[i,j,k]: 0 <= i,j,k < 12}',
    """
        for j
            for i
                <> nu = i - 4
                if nu > 0
                    <> P_val = a[i, j] {id=pset0}
                else
                    P_val = 0.1 * a[i, j] {id=pset1}
                end
                <> B_sum = 0
                for k
                    B_sum = B_sum + k * P_val {id=bset, dep=pset*}
                end
                # here, we are testing that Kc is properly promoted to a vector dtype
                <> Kc = P_val * B_sum {id=kset, dep=bset}
                a[i, j] = Kc {dep=kset}
            end
        end
    """,
    [lp.GlobalArg('a', shape=(12, 12), dtype=np.int32)])

    knl = lp.split_iname(knl, 'j', 4, inner_tag='vec')
    knl = lp.split_array_axis(knl, 'a', 1, 4)
    knl = lp.tag_array_axes(knl, 'a', 'N1,N0,vec')
    knl = lp.preprocess_kernel(knl)

    from loopy.diagnostic import DependencyCycleFound
    with pytest.raises(DependencyCycleFound):
        print(lp.generate_code(knl)[0])


if __name__ == "__main__":
    if len(sys.argv) > 1:
        exec(sys.argv[1])
    else:
        from pytest import main
        main([__file__])

# vim: foldmethod=marker