Newer
Older
knl = make_kernel('', dtype=np.float32, skeleton=skeleton, extra_inames='k')
from loopy.kernel.array import VectorArrayDimTag
assert any(isinstance(x, VectorArrayDimTag)
for x in knl.temporary_variables['Kc'].dim_tags)
Nick Curtis
committed
def test_explicit_simd_selects(ctx_factory):
ctx = ctx_factory()
def create_and_test(insn, condition, answer, exception=None, a=None, b=None,
Nick Curtis
committed
extra_insns=None, c=None, v=None, check=None, debug=False):
a = np.zeros((3, 4), dtype=np.int32) if a is None else a
data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
kwargs = dict(a=a)
if b is not None:
data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
Nick Curtis
committed
if c is not None:
data += [lp.GlobalArg('c', shape=(12,), dtype=b.dtype)]
kwargs['c'] = c
names = [d.name for d in data]
# add after defining names to avoid trying to split value arg
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
Nick Curtis
committed
for i
%(extra)s
if %(condition)s
%(insn)s
end
end
""" % dict(condition=condition,
Nick Curtis
committed
insn=insn,
extra=extra_insns if extra_insns else ''),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
if v is not None:
knl = lp.set_options(knl, write_wrapper=True)
queue = cl.CommandQueue(ctx)
if check is not None:
assert check(knl)
elif exception is not None:
with pytest.raises(exception):
knl(queue, **kwargs)
else:
Nick Curtis
committed
if not isinstance(answer, tuple):
answer = (answer,)
Nick Curtis
committed
if debug:
print(lp.generate_code_v2(knl).device_code())
Nick Curtis
committed
result = knl(queue, **kwargs)[1]
for r, a in zip(result, answer):
assert np.array_equal(r.flatten('C'), a)
ans = np.zeros(12, dtype=np.int32)
ans[7:] = 1
Nick Curtis
committed
# 1) test a conditional on a vector iname
Nick Curtis
committed
create_and_test('a[i] = 1', 'i > 6', ans)
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
Nick Curtis
committed
# 3) condition on a vector temporary
create_and_test('a[i] = 1', 'c', ans, extra_insns='<> c = (i < 7) - 1')
# 4) condition on an assigned vector array, this should work as assignment to a
# vector can be safely unrolled
Nick Curtis
committed
create_and_test('a[i] = 1', '(b[i] > 6)', ans,
b=np.zeros((3, 4), dtype=np.int32),
Nick Curtis
committed
# 5) a block of simple assignments, this should be seemlessly translated to
# multiple vector if statements
c_ans = np.ones(12, dtype=np.int32)
c_ans[7:] = 0
Nick Curtis
committed
create_and_test('a[i] = 1\nc[i] = 0', '(b[i] > 6)', (ans, c_ans), b=np.arange(
12, dtype=np.int32).reshape((3, 4)), c=np.ones((3, 4), dtype=np.int32))
# 6) test a negated conditional
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 7) test conditional on differing dtype
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int64).reshape((3, 4)))
# 8) test conditional on differing dtype (float->int) and (int->float)
Nick Curtis
committed
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.float64).reshape((3, 4)))
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
Nick Curtis
committed
12, dtype=np.int64).reshape((3, 4)), a=np.zeros((3, 4), dtype=np.float32))
# 9) test conditional on valuearg, the "test" here is that we can actually
# generate the code
create_and_test('a[i] = 1', 'v', np.ones_like(ans), v=1)
Nick Curtis
committed
@pytest.mark.parametrize(('lhs_dtype', 'rhs_dtype'), [
(np.int32, np.int64),
(np.float32, np.float64)])
def test_explicit_vector_dtype_conversion(ctx_factory, lhs_dtype, rhs_dtype):
ctx = ctx_factory()
# test that dtype conversion happens correctly between differing vector-dtypes
Nick Curtis
committed
def __make_kernel(insn, has_conversion=True, uses_temp=True):
vw = 4
a_lp = lp.GlobalArg('a', shape=(12,), dtype=rhs_dtype)
temp_lp = lp.TemporaryVariable('temp', dtype=lhs_dtype)
Nick Curtis
committed
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
{insn}
end
""".format(insn=insn),
[a_lp, temp_lp],
target=lp.PyOpenCLTarget(ctx.devices[0]),
silenced_warnings=['temp_to_write(temp)'] if not uses_temp else [])
knl = lp.split_iname(knl, 'i', vw, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 0, 4)
knl = lp.tag_array_axes(knl, 'a', 'N0,vec')
Nick Curtis
committed
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
queue = cl.CommandQueue(ctx)
# check that the kernel compiles correctly
knl(queue, a=np.zeros((12,), dtype=rhs_dtype).reshape((3, 4)))
# check that we have or don't have a conversion
assert ('convert_' in lp.generate_code_v2(knl).device_code()) == \
has_conversion
# test simple dtype conversion
__make_kernel("temp = a[i]")
# test literal assignment
__make_kernel("a[i] = 1", False, False)
# test that a non-vector temporary doesn't trigger conversion
#
# this should generate the code (e.g.,):
# __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1)))
# loopy_kernel(__global long4 *__restrict__ a)
# {
# int temp;
# for (int i_outer = 0; i_outer <= 2; ++i_outer)
# {
# temp = 1;
# a[i_outer] = temp;
# }
# }
#
# that is, temp should _not_ be assigned to "a" w/ convert_long4
__make_kernel("""
temp = 1
a[i] = temp
""", has_conversion=False)
# test that the inverse _does_ result in a convers
__make_kernel("""
temp = a[i] {id=1, dep=*}
a[i] = temp {id=2, dep=1}
""")
Nick Curtis
committed
@pytest.mark.parametrize('dtype', [np.int32, np.int64, np.float32, np.float64])
@pytest.mark.parametrize('vec_width', [2, 3, 4, 8, 16])
def test_explicit_simd_vector_iname_in_conditional(ctx_factory, dtype, vec_width):
ctx = ctx_factory()
Nick Curtis
committed
size = vec_width * 4
def create_and_test(insn, answer, shape=(1, size), debug=False,
Nick Curtis
committed
vectors=['a', 'b']):
num_conditions = shape[0]
Nick Curtis
committed
knl = lp.make_kernel(['{{[i]: 0 <= i < {}}}'.format(size),
Nick Curtis
committed
'{{[j]: 0 <= j < {}}}'.format(num_conditions)],
insn,
Nick Curtis
committed
[lp.GlobalArg('a', shape=shape, dtype=dtype),
lp.GlobalArg('b', shape=shape, dtype=dtype)])
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.tag_inames(knl, [('j', 'g.0')])
knl = lp.split_array_axis(knl, ['a', 'b'], 1, 4)
Nick Curtis
committed
knl = lp.tag_array_axes(knl, vectors, 'N1,N0,vec')
# ensure we can generate code
code = lp.generate_code_v2(knl).device_code()
if debug:
print(code)
# and check answer
queue = cl.CommandQueue(ctx)
Nick Curtis
committed
num_vectors = int(shape[1] / 4)
a = np.zeros((num_conditions, num_vectors, 4), dtype=dtype)
b = np.arange(num_conditions * num_vectors * 4, dtype=dtype).reshape(
(num_conditions, num_vectors, 4))
result = knl(queue, a=a, b=b)[1][0]
assert np.array_equal(result.flatten('C'), answer)
Nick Curtis
committed
ans = np.arange(size, dtype=np.int32)
ans[:7] = 0
create_and_test("""
if i >= 7
a[j, i] = b[j, i]
end
""", ans)
Nick Curtis
committed
# a case that will result in a unvectorized evaluation
# this tests that we are properly able to unwind any vectorized conditional that
# has been applied, and then reapply the correct scalar conditional in
# unvectorize
Nick Curtis
committed
ans = np.arange(12 * size, dtype=np.int32)
Nick Curtis
committed
ans[:7] = 0
create_and_test("""
if j * 12 + i >= 7
a[j, i] = b[j, i]
end
Nick Curtis
committed
""", ans, shape=(12, size), vectors=['b'])
Nick Curtis
committed
Nick Curtis
committed
def test_vectorizability():
# check new vectorizability conditions
from loopy.kernel.array import VectorArrayDimTag
from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type
Nick Curtis
committed
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
def create_and_test(insn, exception=None, a=None, b=None):
a = np.zeros((3, 4), dtype=np.int32) if a is None else a
data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
kwargs = dict(a=a)
if b is not None:
data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
kwargs['b'] = b
names = [d.name for d in data]
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
%(insn)s
end
""" % dict(insn=insn),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
knl = lp.preprocess_kernel(knl)
lp.generate_code_v2(knl).device_code()
assert knl.instructions[0].within_inames & set(['i_inner'])
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert filter_iname_tags_by_type(knl.iname_to_tags['i_inner'], VectorizeTag)
Nick Curtis
committed
def run(op_list=[], unary_operators=[], func_list=[], unary_funcs=[],
rvals=['1', 'a[i]']):
Nick Curtis
committed
for op in op_list:
template = 'a[i] = a[i] %(op)s %(rval)s' \
if op not in unary_operators else 'a[i] = %(op)s a[i]'
for rval in rvals:
create_and_test(template % dict(op=op, rval=rval))
Nick Curtis
committed
for func in func_list:
template = 'a[i] = %(func)s(a[i], %(rval)s)' \
if func not in unary_funcs else 'a[i] = %(func)s(a[i])'
for rval in rvals:
create_and_test(template % dict(func=func, rval=rval))
Nick Curtis
committed
# 1) comparisons
run(['>', '>=', '<', '<=', '==', '!='])
# 2) logical operators
run(['and', 'or', 'not'], ['not'])
# 3) bitwise operators
# bitwise xor '^' not not implemented in codegen
run(['~', '|', '&'], ['~'])
Nick Curtis
committed
# 4) functions -- a random selection of the enabled math functions in opencl
run(func_list=['acos', 'exp10', 'atan2', 'round'],
unary_funcs=['round', 'acos', 'exp10'])
# 5) remainders and floor division (use 4 instead of 1 to avoid pymbolic
# optimizing out the a[i] % 1)
run(['%', '//'], rvals=['a[i]', '4'])
def test_check_for_variable_access_ordering():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
a[i+1] = 13
""")
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
def test_check_for_variable_access_ordering_with_aliasing():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
b[i+1] = 13
""",
[
lp.TemporaryVariable("a", shape="n+1", base_storage="tmp"),
lp.TemporaryVariable("b", shape="n+1", base_storage="tmp"),
])
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
@pytest.mark.parametrize(("second_index", "expect_barrier"),
[
("2*i", True),
("2*i+1", False),
])
def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
knl = lp.make_kernel(
"{[i]: 0<=i<128}",
"""
a[2*i] = 12 {id=first}
a[%s] = 13 {id=second,dep=first}
""" % second_index,
[
lp.TemporaryVariable("a", lp.auto, shape=(256,),
scope=lp.AddressSpace.LOCAL),
])
knl = lp.tag_inames(knl, "i:l.0")
knl = lp.preprocess_kernel(knl)
knl = lp.get_one_scheduled_kernel(knl)
assert barrier_between(knl, "first", "second") == expect_barrier
def test_half_complex_conditional(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.make_kernel(
"{[i]: 0 <= i < 10}",
"""
tmp[i] = if(i < 5, 0, 0j)
""")
knl(queue)
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
def test_dep_cycle_printing_and_error():
# https://gitlab.tiker.net/inducer/loopy/issues/140
# This kernel has two dep cycles.
knl = lp.make_kernel('{[i,j,k]: 0 <= i,j,k < 12}',
"""
for j
for i
<> nu = i - 4
if nu > 0
<> P_val = a[i, j] {id=pset0}
else
P_val = 0.1 * a[i, j] {id=pset1}
end
<> B_sum = 0
for k
B_sum = B_sum + k * P_val {id=bset, dep=pset*}
end
# here, we are testing that Kc is properly promoted to a vector dtype
<> Kc = P_val * B_sum {id=kset, dep=bset}
a[i, j] = Kc {dep=kset}
end
end
""",
[lp.GlobalArg('a', shape=(12, 12), dtype=np.int32)])
knl = lp.split_iname(knl, 'j', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 1, 4)
knl = lp.tag_array_axes(knl, 'a', 'N1,N0,vec')
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import DependencyCycleFound
with pytest.raises(DependencyCycleFound):
print(lp.generate_code(knl)[0])
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
main([__file__])