Newer
Older
Nick Curtis
committed
insn=insn,
extra=extra_insns if extra_insns else ''),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
if v is not None:
knl = lp.set_options(knl, write_wrapper=True)
queue = cl.CommandQueue(ctx)
if check is not None:
assert check(knl)
elif exception is not None:
with pytest.raises(exception):
knl(queue, **kwargs)
else:
Nick Curtis
committed
if not isinstance(answer, tuple):
answer = (answer,)
result = knl(queue, **kwargs)[1]
for r, a in zip(result, answer):
assert np.array_equal(r.flatten('C'), a)
ans = np.zeros(12, dtype=np.int32)
ans[7:] = 1
from loopy.diagnostic import LoopyError
# 1) test a conditional on a vector iname -- currently unimplemented as it
# would require creating a 'shadow' vector iname temporary
create_and_test('a[i] = 1', 'i > 6', ans, exception=LoopyError)
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 3) condition on a vector temporary -- this is currently broken for the
# same reason as #1
create_and_test('a[i] = 1', 'c', ans, extra_insns='<> c = i < 6',
exception=LoopyError)
# 4) condition on an assigned vector array, this should work as assignment to a
# vector can be safely unrolled
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.zeros((3, 4), dtype=np.int32),
extra_insns='b[i] = i')
Nick Curtis
committed
# 5) a block of simple assignments, this should be seemlessly translated to
# multiple vector if statements
c_ans = np.ones(12, dtype=np.int32)
c_ans[7:] = 0
create_and_test('a[i] = 1\nc[i] = 0', 'b[i] > 6', (ans, c_ans), b=np.arange(
12, dtype=np.int32).reshape((3, 4)), c=np.ones((3, 4), dtype=np.int32))
# 6) test a negated conditional
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 7) test conditional on differing dtype
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int64).reshape((3, 4)))
# 8) test conditional on differing dtype (float->int) and (int->float)
Nick Curtis
committed
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.float64).reshape((3, 4)))
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
Nick Curtis
committed
12, dtype=np.int64).reshape((3, 4)), a=np.zeros((3, 4), dtype=np.float32))
# 9) test conditional on valuearg, the "test" here is that we can actually
# generate the code
create_and_test('a[i] = 1', 'v', np.ones_like(ans), v=1)
Nick Curtis
committed
@pytest.mark.parametrize(('lhs_dtype', 'rhs_dtype'), [
(np.int32, np.int64),
(np.float32, np.float64)])
def test_explicit_vector_dtype_conversion(ctx_factory, lhs_dtype, rhs_dtype):
ctx = ctx_factory()
# test that dtype conversion happens correctly between differing vector-dtypes
Nick Curtis
committed
def __make_kernel(insn, has_conversion=True, uses_temp=True):
vw = 4
a_lp = lp.GlobalArg('a', shape=(12,), dtype=rhs_dtype)
temp_lp = lp.TemporaryVariable('temp', dtype=lhs_dtype)
Nick Curtis
committed
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
{insn}
end
""".format(insn=insn),
[a_lp, temp_lp],
target=lp.PyOpenCLTarget(ctx.devices[0]),
silenced_warnings=['temp_to_write(temp)'] if not uses_temp else [])
knl = lp.split_iname(knl, 'i', vw, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 0, 4)
knl = lp.tag_array_axes(knl, 'a', 'N0,vec')
Nick Curtis
committed
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
queue = cl.CommandQueue(ctx)
# check that the kernel compiles correctly
knl(queue, a=np.zeros((12,), dtype=rhs_dtype).reshape((3, 4)))
# check that we have or don't have a conversion
assert ('convert_' in lp.generate_code_v2(knl).device_code()) == \
has_conversion
# test simple dtype conversion
__make_kernel("temp = a[i]")
# test literal assignment
__make_kernel("a[i] = 1", False, False)
# test that a non-vector temporary doesn't trigger conversion
#
# this should generate the code (e.g.,):
# __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1)))
# loopy_kernel(__global long4 *__restrict__ a)
# {
# int temp;
# for (int i_outer = 0; i_outer <= 2; ++i_outer)
# {
# temp = 1;
# a[i_outer] = temp;
# }
# }
#
# that is, temp should _not_ be assigned to "a" w/ convert_long4
__make_kernel("""
temp = 1
a[i] = temp
""", has_conversion=False)
# test that the inverse _does_ result in a convers
__make_kernel("""
temp = a[i] {id=1, dep=*}
a[i] = temp {id=2, dep=1}
""")
Nick Curtis
committed
def test_vectorizability():
# check new vectorizability conditions
from loopy.kernel.array import VectorArrayDimTag
from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type
Nick Curtis
committed
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
def create_and_test(insn, exception=None, a=None, b=None):
a = np.zeros((3, 4), dtype=np.int32) if a is None else a
data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
kwargs = dict(a=a)
if b is not None:
data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
kwargs['b'] = b
names = [d.name for d in data]
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
%(insn)s
end
""" % dict(insn=insn),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
knl = lp.preprocess_kernel(knl)
lp.generate_code_v2(knl).device_code()
assert knl.instructions[0].within_inames & set(['i_inner'])
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert filter_iname_tags_by_type(knl.iname_to_tags['i_inner'], VectorizeTag)
Nick Curtis
committed
def run(op_list=[], unary_operators=[], func_list=[], unary_funcs=[]):
for op in op_list:
template = 'a[i] = a[i] %(op)s %(rval)s' \
if op not in unary_operators else 'a[i] = %(op)s a[i]'
create_and_test(template % dict(op=op, rval='1'))
create_and_test(template % dict(op=op, rval='a[i]'))
for func in func_list:
template = 'a[i] = %(func)s(a[i], %(rval)s)' \
if func not in unary_funcs else 'a[i] = %(func)s(a[i])'
create_and_test(template % dict(func=func, rval='1'))
create_and_test(template % dict(func=func, rval='a[i]'))
# 1) comparisons
run(['>', '>=', '<', '<=', '==', '!='])
# 2) logical operators
run(['and', 'or', 'not'], ['not'])
# 3) bitwise operators
# bitwise xor '^' not not implemented in codegen
run(['~', '|', '&'], ['~'])
Nick Curtis
committed
# 4) functions -- a random selection of the enabled math functions in opencl
run(func_list=['acos', 'exp10', 'atan2', 'round'],
unary_funcs=['round', 'acos', 'exp10'])
def test_check_for_variable_access_ordering():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
a[i+1] = 13
""")
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
def test_check_for_variable_access_ordering_with_aliasing():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
b[i+1] = 13
""",
[
lp.TemporaryVariable("a", shape="n+1", base_storage="tmp"),
lp.TemporaryVariable("b", shape="n+1", base_storage="tmp"),
])
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
@pytest.mark.parametrize(("second_index", "expect_barrier"),
[
("2*i", True),
("2*i+1", False),
])
def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
knl = lp.make_kernel(
"{[i]: 0<=i<128}",
"""
a[2*i] = 12 {id=first}
a[%s] = 13 {id=second,dep=first}
""" % second_index,
[
lp.TemporaryVariable("a", lp.auto, shape=(256,),
scope=lp.temp_var_scope.LOCAL),
])
knl = lp.tag_inames(knl, "i:l.0")
knl = lp.preprocess_kernel(knl)
knl = lp.get_one_scheduled_kernel(knl)
assert barrier_between(knl, "first", "second") == expect_barrier
def test_half_complex_conditional(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.make_kernel(
"{[i]: 0 <= i < 10}",
"""
tmp[i] = if(i < 5, 0, 0j)
""")
knl(queue)
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
def test_dep_cycle_printing_and_error():
# https://gitlab.tiker.net/inducer/loopy/issues/140
# This kernel has two dep cycles.
knl = lp.make_kernel('{[i,j,k]: 0 <= i,j,k < 12}',
"""
for j
for i
<> nu = i - 4
if nu > 0
<> P_val = a[i, j] {id=pset0}
else
P_val = 0.1 * a[i, j] {id=pset1}
end
<> B_sum = 0
for k
B_sum = B_sum + k * P_val {id=bset, dep=pset*}
end
# here, we are testing that Kc is properly promoted to a vector dtype
<> Kc = P_val * B_sum {id=kset, dep=bset}
a[i, j] = Kc {dep=kset}
end
end
""",
[lp.GlobalArg('a', shape=(12, 12), dtype=np.int32)])
knl = lp.split_iname(knl, 'j', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 1, 4)
knl = lp.tag_array_axes(knl, 'a', 'N1,N0,vec')
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import DependencyCycleFound
with pytest.raises(DependencyCycleFound):
print(lp.generate_code(knl)[0])
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
main([__file__])