Skip to content
test_loopy.py 101 KiB
Newer Older

    knl = make_kernel('', dtype=np.float32, skeleton=skeleton, extra_inames='k')
    from loopy.kernel.array import VectorArrayDimTag
    assert any(isinstance(x, VectorArrayDimTag)
               for x in knl.temporary_variables['Kc'].dim_tags)

def test_explicit_simd_selects(ctx_factory):
    ctx = ctx_factory()

Nick Curtis's avatar
Nick Curtis committed
    def create_and_test(insn, condition, answer, exception=None, a=None, b=None,
                        extra_insns=None, c=None, v=None, check=None, debug=False):
        a = np.zeros((3, 4), dtype=np.int32) if a is None else a
        data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
        kwargs = dict(a=a)
        if b is not None:
Nick Curtis's avatar
Nick Curtis committed
            data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
            kwargs['b'] = b
        if c is not None:
            data += [lp.GlobalArg('c', shape=(12,), dtype=b.dtype)]
            kwargs['c'] = c
Nick Curtis's avatar
Nick Curtis committed
        names = [d.name for d in data]
        # add after defining names to avoid trying to split value arg
        if v is not None:
Nick Curtis's avatar
Nick Curtis committed
            data += [lp.ValueArg('v', dtype=np.int32)]
            kwargs['v'] = v

        knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
            """
            end
            """ % dict(condition=condition,
                       insn=insn,
                       extra=extra_insns if extra_insns else ''),
            data
            )

        knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
        knl = lp.split_array_axis(knl, names, 0, 4)
        knl = lp.tag_array_axes(knl, names, 'N0,vec')
Nick Curtis's avatar
Nick Curtis committed
        if v is not None:
            knl = lp.set_options(knl, write_wrapper=True)

        queue = cl.CommandQueue(ctx)
Nick Curtis's avatar
Nick Curtis committed
        if check is not None:
            assert check(knl)
        elif exception is not None:
            with pytest.raises(exception):
                knl(queue, **kwargs)
        else:
            if not isinstance(answer, tuple):
                answer = (answer,)
            if debug:
                print(lp.generate_code_v2(knl).device_code())
            result = knl(queue, **kwargs)[1]
            for r, a in zip(result, answer):
                assert np.array_equal(r.flatten('C'), a)

    ans = np.zeros(12, dtype=np.int32)
    ans[7:] = 1
Nick Curtis's avatar
Nick Curtis committed
    # 2) condition on a vector array
Nick Curtis's avatar
Nick Curtis committed
    create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.arange(
        12, dtype=np.int32).reshape((3, 4)))
    # 3) condition on a vector temporary
    create_and_test('a[i] = 1', 'c', ans, extra_insns='<> c = (i < 7) - 1')
Nick Curtis's avatar
Nick Curtis committed
    # 4) condition on an assigned vector array, this should work as assignment to a
    # vector can be safely unrolled
    create_and_test('a[i] = 1', '(b[i] > 6)', ans,
                    b=np.zeros((3, 4), dtype=np.int32),
Nick Curtis's avatar
Nick Curtis committed
                    extra_insns='b[i] = i')
    # 5) a block of simple assignments, this should be seemlessly translated to
    # multiple vector if statements
    c_ans = np.ones(12, dtype=np.int32)
    c_ans[7:] = 0
    create_and_test('a[i] = 1\nc[i] = 0', '(b[i] > 6)', (ans, c_ans), b=np.arange(
        12, dtype=np.int32).reshape((3, 4)), c=np.ones((3, 4), dtype=np.int32))
    # 6) test a negated conditional
    ans_negated = np.invert(ans) + 2
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.int32).reshape((3, 4)))
    # 7) test conditional on differing dtype
    ans_negated = np.invert(ans) + 2
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.int64).reshape((3, 4)))
    # 8) test conditional on differing dtype (float->int) and (int->float)
    ans_negated = np.invert(ans) + 2
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.float64).reshape((3, 4)))
    create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
        12, dtype=np.int64).reshape((3, 4)), a=np.zeros((3, 4), dtype=np.float32))
Nick Curtis's avatar
Nick Curtis committed
    # 9) test conditional on valuearg, the "test" here is that we can actually
    # generate the code
    create_and_test('a[i] = 1', 'v', np.ones_like(ans), v=1)
@pytest.mark.parametrize(('lhs_dtype', 'rhs_dtype'), [
    (np.int32, np.int64),
    (np.float32, np.float64)])
def test_explicit_vector_dtype_conversion(ctx_factory, lhs_dtype, rhs_dtype):
    ctx = ctx_factory()

    # test that dtype conversion happens correctly between differing vector-dtypes
    def __make_kernel(insn, has_conversion=True, uses_temp=True):
        vw = 4
        a_lp = lp.GlobalArg('a', shape=(12,), dtype=rhs_dtype)
        temp_lp = lp.TemporaryVariable('temp', dtype=lhs_dtype)
        knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
                """
                for i
                    {insn}
                end
                """.format(insn=insn),
                [a_lp, temp_lp],
                target=lp.PyOpenCLTarget(ctx.devices[0]),
                silenced_warnings=['temp_to_write(temp)'] if not uses_temp else [])
        knl = lp.split_iname(knl, 'i', vw, inner_tag='vec')
        knl = lp.split_array_axis(knl, 'a', 0, 4)
        knl = lp.tag_array_axes(knl, 'a', 'N0,vec')
        queue = cl.CommandQueue(ctx)
        # check that the kernel compiles correctly
        knl(queue, a=np.zeros((12,), dtype=rhs_dtype).reshape((3, 4)))

        # check that we have or don't have a conversion
        assert ('convert_' in lp.generate_code_v2(knl).device_code()) == \
            has_conversion

    # test simple dtype conversion
    __make_kernel("temp = a[i]")

    # test literal assignment
    __make_kernel("a[i] = 1", False, False)

    # test that a non-vector temporary doesn't trigger conversion
    #
    # this should generate the code (e.g.,):
    #   __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1)))
    #   loopy_kernel(__global long4 *__restrict__ a)
    #   {
    #      int temp;
    #      for (int i_outer = 0; i_outer <= 2; ++i_outer)
    #      {
    #        temp = 1;
    #        a[i_outer] = temp;
    #      }
    #    }
    #
    # that is, temp should _not_ be assigned to "a" w/ convert_long4
    __make_kernel("""
                      temp = 1
                      a[i] = temp
                  """, has_conversion=False)

    # test that the inverse _does_ result in a convers
    __make_kernel("""
                      temp = a[i] {id=1, dep=*}
                      a[i] = temp {id=2, dep=1}
                  """)
@pytest.mark.parametrize(('dtype'), [np.int32, np.int64, np.float32, np.float64])
def test_explicit_simd_vector_iname_in_conditional(ctx_factory, dtype):
    def create_and_test(insn, answer, shape=(1, 12), debug=False,
                        vectors=['a', 'b']):
        num_conditions = shape[0]
        knl = lp.make_kernel(['{[i]: 0 <= i < 12}',
                              '{{[j]: 0 <= j < {}}}'.format(num_conditions)],
                             [lp.GlobalArg('a', shape=shape, dtype=dtype),
                              lp.GlobalArg('b', shape=shape, dtype=dtype)])

        knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
        knl = lp.tag_inames(knl, [('j', 'g.0')])
        knl = lp.split_array_axis(knl, ['a', 'b'], 1, 4)
        knl = lp.tag_array_axes(knl, vectors, 'N1,N0,vec')

        # ensure we can generate code
        code = lp.generate_code_v2(knl).device_code()
        if debug:
            print(code)
        # and check answer
        queue = cl.CommandQueue(ctx)

        num_vectors = int(shape[1] / 4)
        a = np.zeros((num_conditions, num_vectors, 4), dtype=dtype)
        b = np.arange(num_conditions * num_vectors * 4, dtype=dtype).reshape(
            (num_conditions, num_vectors, 4))
        result = knl(queue, a=a, b=b)[1][0]

        assert np.array_equal(result.flatten('C'), answer)

    ans = np.arange(12, dtype=np.int32)
    ans[:7] = 0
    create_and_test("""
        if i >= 7
            a[j, i] = b[j, i]
        end
    """, ans)

    # a case that will result in a unvectorized evaluation
    # this tests that we are properly able to unwind any vectorized conditional that
    # has been applied, and then reapply the correct scalar conditional in
    # unvectorize
    ans = np.arange(144, dtype=np.int32)
    ans[:7] = 0
    create_and_test("""
        if j * 12 + i >= 7
            a[j, i] = b[j, i]
        end
    """, ans, shape=(12, 12), vectors=['b'])

def test_vectorizability():
    # check new vectorizability conditions
    from loopy.kernel.array import VectorArrayDimTag
    from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type

    def create_and_test(insn, exception=None, a=None, b=None):
        a = np.zeros((3, 4), dtype=np.int32) if a is None else a
        data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
        kwargs = dict(a=a)
        if b is not None:
            data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
            kwargs['b'] = b
        names = [d.name for d in data]

        knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
            """
            for i
                %(insn)s
            end
            """ % dict(insn=insn),
            data
            )

        knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
        knl = lp.split_array_axis(knl, names, 0, 4)
        knl = lp.tag_array_axes(knl, names, 'N0,vec')
        knl = lp.preprocess_kernel(knl)
        lp.generate_code_v2(knl).device_code()
        assert knl.instructions[0].within_inames & set(['i_inner'])
        assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
        assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
        assert filter_iname_tags_by_type(knl.iname_to_tags['i_inner'], VectorizeTag)
    def run(op_list=[], unary_operators=[], func_list=[], unary_funcs=[],
            rvals=['1', 'a[i]']):
        for op in op_list:
            template = 'a[i] = a[i] %(op)s %(rval)s' \
                if op not in unary_operators else 'a[i] = %(op)s a[i]'
            for rval in rvals:
                create_and_test(template % dict(op=op, rval=rval))
        for func in func_list:
            template = 'a[i] = %(func)s(a[i], %(rval)s)' \
                if func not in unary_funcs else 'a[i] = %(func)s(a[i])'
            for rval in rvals:
                create_and_test(template % dict(func=func, rval=rval))

    # 1) comparisons
    run(['>', '>=', '<', '<=', '==', '!='])

    # 2) logical operators
    run(['and', 'or', 'not'], ['not'])

    # 3) bitwise operators
    # bitwise xor '^' not not implemented in codegen
    run(['~', '|', '&'], ['~'])

    # 4) functions -- a random selection of the enabled math functions in opencl
    run(func_list=['acos', 'exp10', 'atan2', 'round'],
        unary_funcs=['round', 'acos', 'exp10'])
    # 5) remainders and floor division (use 4 instead of 1 to avoid pymbolic
    #    optimizing out the a[i] % 1)
    run(['%', '//'], rvals=['a[i]', '4'])
def test_check_for_variable_access_ordering():
    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            """
            a[i] = 12
            a[i+1] = 13
            """)

    knl = lp.preprocess_kernel(knl)

    from loopy.diagnostic import VariableAccessNotOrdered
    with pytest.raises(VariableAccessNotOrdered):
        lp.get_one_scheduled_kernel(knl)


def test_check_for_variable_access_ordering_with_aliasing():
    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            """
            a[i] = 12
            b[i+1] = 13
            """,
            [
                lp.TemporaryVariable("a", shape="n+1", base_storage="tmp"),
                lp.TemporaryVariable("b", shape="n+1", base_storage="tmp"),
                ])

    knl = lp.preprocess_kernel(knl)

    from loopy.diagnostic import VariableAccessNotOrdered
    with pytest.raises(VariableAccessNotOrdered):
        lp.get_one_scheduled_kernel(knl)


@pytest.mark.parametrize(("second_index", "expect_barrier"),
        [
            ("2*i", True),
            ("2*i+1", False),
            ])
def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
    knl = lp.make_kernel(
            "{[i]: 0<=i<128}",
            """
            a[2*i] = 12  {id=first}
            a[%s] = 13  {id=second,dep=first}
            """ % second_index,
            [
                lp.TemporaryVariable("a", lp.auto, shape=(256,),
                    scope=lp.AddressSpace.LOCAL),
                ])

    knl = lp.tag_inames(knl, "i:l.0")

    knl = lp.preprocess_kernel(knl)
    knl = lp.get_one_scheduled_kernel(knl)

    assert barrier_between(knl, "first", "second") == expect_barrier


def test_half_complex_conditional(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
            "{[i]: 0 <= i < 10}",
            """
           tmp[i] = if(i < 5, 0, 0j)
           """)

    knl(queue)


def test_dep_cycle_printing_and_error():
    # https://gitlab.tiker.net/inducer/loopy/issues/140
    # This kernel has two dep cycles.

    knl = lp.make_kernel('{[i,j,k]: 0 <= i,j,k < 12}',
    """
        for j
            for i
                <> nu = i - 4
                if nu > 0
                    <> P_val = a[i, j] {id=pset0}
                else
                    P_val = 0.1 * a[i, j] {id=pset1}
                end
                <> B_sum = 0
                for k
                    B_sum = B_sum + k * P_val {id=bset, dep=pset*}
                end
                # here, we are testing that Kc is properly promoted to a vector dtype
                <> Kc = P_val * B_sum {id=kset, dep=bset}
                a[i, j] = Kc {dep=kset}
            end
        end
    """,
    [lp.GlobalArg('a', shape=(12, 12), dtype=np.int32)])

    knl = lp.split_iname(knl, 'j', 4, inner_tag='vec')
    knl = lp.split_array_axis(knl, 'a', 1, 4)
    knl = lp.tag_array_axes(knl, 'a', 'N1,N0,vec')
    knl = lp.preprocess_kernel(knl)

    from loopy.diagnostic import DependencyCycleFound
    with pytest.raises(DependencyCycleFound):
        print(lp.generate_code(knl)[0])


if __name__ == "__main__":
    if len(sys.argv) > 1:
        exec(sys.argv[1])
    else:
        from pytest import main
Andreas Klöckner's avatar
Andreas Klöckner committed
# vim: foldmethod=marker