Newer
Older
<> B_sum = 0 {id=bset0}
for k
B_sum = B_sum + k * a[i, j] {id=bset1, dep=*:bset0}
end
# here, we are testing that Kc is properly promoted to a vector dtype
<> P_sum = P_val * i {id=pset2, dep=pset0:pset1}
B_sum = exp(B_sum) {id=bset2, dep=bset0:bset1}
<> Kc = P_sum * B_sum {id=kset, dep=bset*:pset2}
a[i, j] = Kc {dep=*:kset, nosync=pset0:pset1}
end
end
"""
knl = make_kernel('', dtype=np.float32, skeleton=skeleton, extra_inames='k')
from loopy.kernel.array import VectorArrayDimTag
assert any(isinstance(x, VectorArrayDimTag)
for x in knl.temporary_variables['Kc'].dim_tags)
Nick Curtis
committed
def test_explicit_simd_selects(ctx_factory):
ctx = ctx_factory()
def create_and_test(insn, condition, answer, exception=None, a=None, b=None,
a = np.zeros((3, 4), dtype=np.int32) if a is None else a
data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
kwargs = dict(a=a)
if b is not None:
data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
Nick Curtis
committed
if c is not None:
data += [lp.GlobalArg('c', shape=(12,), dtype=b.dtype)]
kwargs['c'] = c
names = [d.name for d in data]
# add after defining names to avoid trying to split value arg
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
Nick Curtis
committed
for i
%(extra)s
if %(condition)s
%(insn)s
end
end
""" % dict(condition=condition,
Nick Curtis
committed
insn=insn,
extra=extra_insns if extra_insns else ''),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
if v is not None:
knl = lp.set_options(knl, write_wrapper=True)
queue = cl.CommandQueue(ctx)
if check is not None:
assert check(knl)
elif exception is not None:
with pytest.raises(exception):
knl(queue, **kwargs)
else:
Nick Curtis
committed
if not isinstance(answer, tuple):
answer = (answer,)
result = knl(queue, **kwargs)[1]
for r, a in zip(result, answer):
assert np.array_equal(r.flatten('C'), a)
ans = np.zeros(12, dtype=np.int32)
ans[7:] = 1
from loopy.diagnostic import LoopyError
# 1) test a conditional on a vector iname -- currently unimplemented as it
# would require creating a 'shadow' vector iname temporary
create_and_test('a[i] = 1', 'i > 6', ans, exception=LoopyError)
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 3) condition on a vector temporary -- this is currently broken for the
# same reason as #1
create_and_test('a[i] = 1', 'c', ans, extra_insns='<> c = i < 6',
exception=LoopyError)
# 4) condition on an assigned vector array, this should work as assignment to a
# vector can be safely unrolled
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.zeros((3, 4), dtype=np.int32),
extra_insns='b[i] = i')
Nick Curtis
committed
# 5) a block of simple assignments, this should be seemlessly translated to
# multiple vector if statements
c_ans = np.ones(12, dtype=np.int32)
c_ans[7:] = 0
create_and_test('a[i] = 1\nc[i] = 0', 'b[i] > 6', (ans, c_ans), b=np.arange(
12, dtype=np.int32).reshape((3, 4)), c=np.ones((3, 4), dtype=np.int32))
# 6) test a negated conditional
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 7) test conditional on differing dtype
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int64).reshape((3, 4)))
# 8) test conditional on differing dtype (float->int) and (int->float)
Nick Curtis
committed
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.float64).reshape((3, 4)))
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
Nick Curtis
committed
12, dtype=np.int64).reshape((3, 4)), a=np.zeros((3, 4), dtype=np.float32))
# 9) test conditional on valuearg, the "test" here is that we can actually
# generate the code
create_and_test('a[i] = 1', 'v', np.ones_like(ans), v=1)
Nick Curtis
committed
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
@pytest.mark.parametrize(('lhs_dtype', 'rhs_dtype'), [
(np.int32, np.int64),
(np.float32, np.float64)])
def test_explicit_vector_dtype_conversion(ctx_factory, lhs_dtype, rhs_dtype):
ctx = ctx_factory()
# test that dtype conversion happens correctly between differing vector-dtypes
vw = 4
a_lp = lp.GlobalArg('a', shape=(12,), dtype=rhs_dtype)
temp_lp = lp.TemporaryVariable('temp', dtype=lhs_dtype)
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
temp = a[i]
end
""",
[a_lp, temp_lp])
knl = lp.split_iname(knl, 'i', vw, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 0, 4)
knl = lp.tag_array_axes(knl, 'a', 'N0,vec')
queue = cl.CommandQueue(ctx)
knl(queue, a=np.zeros((12,), dtype=rhs_dtype).reshape((3, 4)))
Nick Curtis
committed
def test_vectorizability():
# check new vectorizability conditions
from loopy.kernel.array import VectorArrayDimTag
from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type
Nick Curtis
committed
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
def create_and_test(insn, exception=None, a=None, b=None):
a = np.zeros((3, 4), dtype=np.int32) if a is None else a
data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
kwargs = dict(a=a)
if b is not None:
data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
kwargs['b'] = b
names = [d.name for d in data]
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
%(insn)s
end
""" % dict(insn=insn),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
knl = lp.preprocess_kernel(knl)
lp.generate_code_v2(knl).device_code()
assert knl.instructions[0].within_inames & set(['i_inner'])
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert filter_iname_tags_by_type(knl.iname_to_tags['i_inner'], VectorizeTag)
Nick Curtis
committed
def run(op_list=[], unary_operators=[], func_list=[], unary_funcs=[]):
for op in op_list:
template = 'a[i] = a[i] %(op)s %(rval)s' \
if op not in unary_operators else 'a[i] = %(op)s a[i]'
create_and_test(template % dict(op=op, rval='1'))
create_and_test(template % dict(op=op, rval='a[i]'))
for func in func_list:
template = 'a[i] = %(func)s(a[i], %(rval)s)' \
if func not in unary_funcs else 'a[i] = %(func)s(a[i])'
create_and_test(template % dict(func=func, rval='1'))
create_and_test(template % dict(func=func, rval='a[i]'))
# 1) comparisons
run(['>', '>=', '<', '<=', '==', '!='])
# 2) logical operators
run(['and', 'or', 'not'], ['not'])
# 3) bitwise operators
# bitwise xor '^' not not implemented in codegen
run(['~', '|', '&'], ['~'])
Nick Curtis
committed
# 4) functions -- a random selection of the enabled math functions in opencl
run(func_list=['acos', 'exp10', 'atan2', 'round'],
unary_funcs=['round', 'acos', 'exp10'])
def test_check_for_variable_access_ordering():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
a[i+1] = 13
""")
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
def test_check_for_variable_access_ordering_with_aliasing():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
b[i+1] = 13
""",
[
lp.TemporaryVariable("a", shape="n+1", base_storage="tmp"),
lp.TemporaryVariable("b", shape="n+1", base_storage="tmp"),
])
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
@pytest.mark.parametrize(("second_index", "expect_barrier"),
[
("2*i", True),
("2*i+1", False),
])
def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
knl = lp.make_kernel(
"{[i]: 0<=i<128}",
"""
a[2*i] = 12 {id=first}
a[%s] = 13 {id=second,dep=first}
""" % second_index,
[
lp.TemporaryVariable("a", lp.auto, shape=(256,),
scope=lp.temp_var_scope.LOCAL),
])
knl = lp.tag_inames(knl, "i:l.0")
knl = lp.preprocess_kernel(knl)
knl = lp.get_one_scheduled_kernel(knl)
assert barrier_between(knl, "first", "second") == expect_barrier
def test_half_complex_conditional(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.make_kernel(
"{[i]: 0 <= i < 10}",
"""
tmp[i] = if(i < 5, 0, 0j)
""")
knl(queue)
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
def test_dep_cycle_printing_and_error():
# https://gitlab.tiker.net/inducer/loopy/issues/140
# This kernel has two dep cycles.
knl = lp.make_kernel('{[i,j,k]: 0 <= i,j,k < 12}',
"""
for j
for i
<> nu = i - 4
if nu > 0
<> P_val = a[i, j] {id=pset0}
else
P_val = 0.1 * a[i, j] {id=pset1}
end
<> B_sum = 0
for k
B_sum = B_sum + k * P_val {id=bset, dep=pset*}
end
# here, we are testing that Kc is properly promoted to a vector dtype
<> Kc = P_val * B_sum {id=kset, dep=bset}
a[i, j] = Kc {dep=kset}
end
end
""",
[lp.GlobalArg('a', shape=(12, 12), dtype=np.int32)])
knl = lp.split_iname(knl, 'j', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 1, 4)
knl = lp.tag_array_axes(knl, 'a', 'N1,N0,vec')
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import DependencyCycleFound
with pytest.raises(DependencyCycleFound):
print(lp.generate_code(knl)[0])
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
main([__file__])