Newer
Older
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
if v is not None:
knl = lp.set_options(knl, write_wrapper=True)
queue = cl.CommandQueue(ctx)
if check is not None:
assert check(knl)
elif exception is not None:
with pytest.raises(exception):
knl(queue, **kwargs)
else:
Nick Curtis
committed
if not isinstance(answer, tuple):
answer = (answer,)
Nick Curtis
committed
if debug:
print(lp.generate_code_v2(knl).device_code())
Nick Curtis
committed
result = knl(queue, **kwargs)[1]
for r, a in zip(result, answer):
assert np.array_equal(r.flatten('C'), a)
ans = np.zeros(12, dtype=np.int32)
ans[7:] = 1
Nick Curtis
committed
# 1) test a conditional on a vector iname
Nick Curtis
committed
create_and_test('a[i] = 1', 'i > 6', ans)
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
Nick Curtis
committed
# 3) condition on a vector temporary
create_and_test('a[i] = 1', 'c', ans, extra_insns='<> c = (i < 7) - 1')
# 4) condition on an assigned vector array, this should work as assignment to a
# vector can be safely unrolled
Nick Curtis
committed
create_and_test('a[i] = 1', '(b[i] > 6)', ans,
b=np.zeros((3, 4), dtype=np.int32),
Nick Curtis
committed
# 5) a block of simple assignments, this should be seemlessly translated to
# multiple vector if statements
c_ans = np.ones(12, dtype=np.int32)
c_ans[7:] = 0
Nick Curtis
committed
create_and_test('a[i] = 1\nc[i] = 0', '(b[i] > 6)', (ans, c_ans), b=np.arange(
12, dtype=np.int32).reshape((3, 4)), c=np.ones((3, 4), dtype=np.int32))
# 6) test a negated conditional
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 7) test conditional on differing dtype
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int64).reshape((3, 4)))
# 8) test conditional on differing dtype (float->int) and (int->float)
Nick Curtis
committed
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.float64).reshape((3, 4)))
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
Nick Curtis
committed
12, dtype=np.int64).reshape((3, 4)), a=np.zeros((3, 4), dtype=np.float32))
# 9) test conditional on valuearg, the "test" here is that we can actually
# generate the code
create_and_test('a[i] = 1', 'v', np.ones_like(ans), v=1)
Nick Curtis
committed
@pytest.mark.parametrize(('lhs_dtype', 'rhs_dtype'), [
(np.int32, np.int64),
(np.float32, np.float64)])
def test_explicit_vector_dtype_conversion(ctx_factory, lhs_dtype, rhs_dtype):
ctx = ctx_factory()
# test that dtype conversion happens correctly between differing vector-dtypes
Nick Curtis
committed
def __make_kernel(insn, has_conversion=True, uses_temp=True):
vw = 4
a_lp = lp.GlobalArg('a', shape=(12,), dtype=rhs_dtype)
temp_lp = lp.TemporaryVariable('temp', dtype=lhs_dtype)
Nick Curtis
committed
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
{insn}
end
""".format(insn=insn),
[a_lp, temp_lp],
target=lp.PyOpenCLTarget(ctx.devices[0]),
silenced_warnings=['temp_to_write(temp)'] if not uses_temp else [])
knl = lp.split_iname(knl, 'i', vw, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 0, 4)
knl = lp.tag_array_axes(knl, 'a', 'N0,vec')
Nick Curtis
committed
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
queue = cl.CommandQueue(ctx)
# check that the kernel compiles correctly
knl(queue, a=np.zeros((12,), dtype=rhs_dtype).reshape((3, 4)))
# check that we have or don't have a conversion
assert ('convert_' in lp.generate_code_v2(knl).device_code()) == \
has_conversion
# test simple dtype conversion
__make_kernel("temp = a[i]")
# test literal assignment
__make_kernel("a[i] = 1", False, False)
# test that a non-vector temporary doesn't trigger conversion
#
# this should generate the code (e.g.,):
# __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1)))
# loopy_kernel(__global long4 *__restrict__ a)
# {
# int temp;
# for (int i_outer = 0; i_outer <= 2; ++i_outer)
# {
# temp = 1;
# a[i_outer] = temp;
# }
# }
#
# that is, temp should _not_ be assigned to "a" w/ convert_long4
__make_kernel("""
temp = 1
a[i] = temp
""", has_conversion=False)
# test that the inverse _does_ result in a convers
__make_kernel("""
temp = a[i] {id=1, dep=*}
a[i] = temp {id=2, dep=1}
""")
Nick Curtis
committed
@pytest.mark.parametrize(('dtype'), [np.int32, np.int64, np.float32, np.float64])
def test_explicit_simd_vector_iname_in_conditional(ctx_factory, dtype):
ctx = ctx_factory()
Nick Curtis
committed
def create_and_test(insn, answer, shape=(1, 12), debug=False,
vectors=['a', 'b']):
num_conditions = shape[0]
knl = lp.make_kernel(['{[i]: 0 <= i < 12}',
'{{[j]: 0 <= j < {}}}'.format(num_conditions)],
insn,
Nick Curtis
committed
[lp.GlobalArg('a', shape=shape, dtype=dtype),
lp.GlobalArg('b', shape=shape, dtype=dtype)])
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.tag_inames(knl, [('j', 'g.0')])
knl = lp.split_array_axis(knl, ['a', 'b'], 1, 4)
Nick Curtis
committed
knl = lp.tag_array_axes(knl, vectors, 'N1,N0,vec')
# ensure we can generate code
code = lp.generate_code_v2(knl).device_code()
if debug:
print(code)
# and check answer
queue = cl.CommandQueue(ctx)
Nick Curtis
committed
num_vectors = int(shape[1] / 4)
a = np.zeros((num_conditions, num_vectors, 4), dtype=dtype)
b = np.arange(num_conditions * num_vectors * 4, dtype=dtype).reshape(
(num_conditions, num_vectors, 4))
result = knl(queue, a=a, b=b)[1][0]
assert np.array_equal(result.flatten('C'), answer)
ans = np.arange(12, dtype=np.int32)
ans[:7] = 0
create_and_test("""
if i >= 7
a[j, i] = b[j, i]
end
""", ans)
Nick Curtis
committed
# a case that will result in a unvectorized evaluation
# this tests that we are properly able to unwind any vectorized conditional that
# has been applied, and then reapply the correct scalar conditional in
# unvectorize
ans = np.arange(144, dtype=np.int32)
ans[:7] = 0
create_and_test("""
if j * 12 + i >= 7
a[j, i] = b[j, i]
end
""", ans, shape=(12, 12), vectors=['b'])
Nick Curtis
committed
def test_vectorizability():
# check new vectorizability conditions
from loopy.kernel.array import VectorArrayDimTag
from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type
Nick Curtis
committed
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
def create_and_test(insn, exception=None, a=None, b=None):
a = np.zeros((3, 4), dtype=np.int32) if a is None else a
data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
kwargs = dict(a=a)
if b is not None:
data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
kwargs['b'] = b
names = [d.name for d in data]
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
%(insn)s
end
""" % dict(insn=insn),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
knl = lp.preprocess_kernel(knl)
lp.generate_code_v2(knl).device_code()
assert knl.instructions[0].within_inames & set(['i_inner'])
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert filter_iname_tags_by_type(knl.iname_to_tags['i_inner'], VectorizeTag)
Nick Curtis
committed
def run(op_list=[], unary_operators=[], func_list=[], unary_funcs=[],
rvals=['1', 'a[i]']):
Nick Curtis
committed
for op in op_list:
template = 'a[i] = a[i] %(op)s %(rval)s' \
if op not in unary_operators else 'a[i] = %(op)s a[i]'
for rval in rvals:
create_and_test(template % dict(op=op, rval=rval))
Nick Curtis
committed
for func in func_list:
template = 'a[i] = %(func)s(a[i], %(rval)s)' \
if func not in unary_funcs else 'a[i] = %(func)s(a[i])'
for rval in rvals:
create_and_test(template % dict(func=func, rval=rval))
Nick Curtis
committed
# 1) comparisons
run(['>', '>=', '<', '<=', '==', '!='])
# 2) logical operators
run(['and', 'or', 'not'], ['not'])
# 3) bitwise operators
# bitwise xor '^' not not implemented in codegen
run(['~', '|', '&'], ['~'])
Nick Curtis
committed
# 4) functions -- a random selection of the enabled math functions in opencl
run(func_list=['acos', 'exp10', 'atan2', 'round'],
unary_funcs=['round', 'acos', 'exp10'])
# 5) remainders and floor division (use 4 instead of 1 to avoid pymbolic
# optimizing out the a[i] % 1)
run(['%', '//'], rvals=['a[i]', '4'])
def test_check_for_variable_access_ordering():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
a[i+1] = 13
""")
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
def test_check_for_variable_access_ordering_with_aliasing():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
b[i+1] = 13
""",
[
lp.TemporaryVariable("a", shape="n+1", base_storage="tmp"),
lp.TemporaryVariable("b", shape="n+1", base_storage="tmp"),
])
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
@pytest.mark.parametrize(("second_index", "expect_barrier"),
[
("2*i", True),
("2*i+1", False),
])
def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
knl = lp.make_kernel(
"{[i]: 0<=i<128}",
"""
a[2*i] = 12 {id=first}
a[%s] = 13 {id=second,dep=first}
""" % second_index,
[
lp.TemporaryVariable("a", lp.auto, shape=(256,),
scope=lp.AddressSpace.LOCAL),
])
knl = lp.tag_inames(knl, "i:l.0")
knl = lp.preprocess_kernel(knl)
knl = lp.get_one_scheduled_kernel(knl)
assert barrier_between(knl, "first", "second") == expect_barrier
def test_half_complex_conditional(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.make_kernel(
"{[i]: 0 <= i < 10}",
"""
tmp[i] = if(i < 5, 0, 0j)
""")
knl(queue)
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
def test_dep_cycle_printing_and_error():
# https://gitlab.tiker.net/inducer/loopy/issues/140
# This kernel has two dep cycles.
knl = lp.make_kernel('{[i,j,k]: 0 <= i,j,k < 12}',
"""
for j
for i
<> nu = i - 4
if nu > 0
<> P_val = a[i, j] {id=pset0}
else
P_val = 0.1 * a[i, j] {id=pset1}
end
<> B_sum = 0
for k
B_sum = B_sum + k * P_val {id=bset, dep=pset*}
end
# here, we are testing that Kc is properly promoted to a vector dtype
<> Kc = P_val * B_sum {id=kset, dep=bset}
a[i, j] = Kc {dep=kset}
end
end
""",
[lp.GlobalArg('a', shape=(12, 12), dtype=np.int32)])
knl = lp.split_iname(knl, 'j', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 1, 4)
knl = lp.tag_array_axes(knl, 'a', 'N1,N0,vec')
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import DependencyCycleFound
with pytest.raises(DependencyCycleFound):
print(lp.generate_code(knl)[0])
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
main([__file__])