Newer
Older
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
if v is not None:
knl = lp.set_options(knl, write_wrapper=True)
queue = cl.CommandQueue(ctx)
if check is not None:
assert check(knl)
elif exception is not None:
with pytest.raises(exception):
knl(queue, **kwargs)
else:
Nick Curtis
committed
if not isinstance(answer, tuple):
answer = (answer,)
result = knl(queue, **kwargs)[1]
for r, a in zip(result, answer):
assert np.array_equal(r.flatten('C'), a)
ans = np.zeros(12, dtype=np.int32)
ans[7:] = 1
from loopy.diagnostic import LoopyError
# 1) test a conditional on a vector iname -- currently unimplemented as it
# would require creating a 'shadow' vector iname temporary
Nick Curtis
committed
create_and_test('a[i] = 1', 'i > 6', ans)
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 3) condition on a vector temporary -- this is currently broken for the
# same reason as #1
create_and_test('a[i] = 1', 'c', ans, extra_insns='<> c = i < 6',
exception=LoopyError)
# 4) condition on an assigned vector array, this should work as assignment to a
# vector can be safely unrolled
create_and_test('a[i] = 1', 'b[i] > 6', ans, b=np.zeros((3, 4), dtype=np.int32),
extra_insns='b[i] = i')
Nick Curtis
committed
# 5) a block of simple assignments, this should be seemlessly translated to
# multiple vector if statements
c_ans = np.ones(12, dtype=np.int32)
c_ans[7:] = 0
create_and_test('a[i] = 1\nc[i] = 0', 'b[i] > 6', (ans, c_ans), b=np.arange(
12, dtype=np.int32).reshape((3, 4)), c=np.ones((3, 4), dtype=np.int32))
# 6) test a negated conditional
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int32).reshape((3, 4)))
# 7) test conditional on differing dtype
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.int64).reshape((3, 4)))
# 8) test conditional on differing dtype (float->int) and (int->float)
Nick Curtis
committed
ans_negated = np.invert(ans) + 2
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
12, dtype=np.float64).reshape((3, 4)))
create_and_test('a[i] = 1', 'not (b[i] > 6)', ans_negated, b=np.arange(
Nick Curtis
committed
12, dtype=np.int64).reshape((3, 4)), a=np.zeros((3, 4), dtype=np.float32))
# 9) test conditional on valuearg, the "test" here is that we can actually
# generate the code
create_and_test('a[i] = 1', 'v', np.ones_like(ans), v=1)
Nick Curtis
committed
@pytest.mark.parametrize(('lhs_dtype', 'rhs_dtype'), [
(np.int32, np.int64),
(np.float32, np.float64)])
def test_explicit_vector_dtype_conversion(ctx_factory, lhs_dtype, rhs_dtype):
ctx = ctx_factory()
# test that dtype conversion happens correctly between differing vector-dtypes
Nick Curtis
committed
def __make_kernel(insn, has_conversion=True, uses_temp=True):
vw = 4
a_lp = lp.GlobalArg('a', shape=(12,), dtype=rhs_dtype)
temp_lp = lp.TemporaryVariable('temp', dtype=lhs_dtype)
Nick Curtis
committed
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
{insn}
end
""".format(insn=insn),
[a_lp, temp_lp],
target=lp.PyOpenCLTarget(ctx.devices[0]),
silenced_warnings=['temp_to_write(temp)'] if not uses_temp else [])
knl = lp.split_iname(knl, 'i', vw, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 0, 4)
knl = lp.tag_array_axes(knl, 'a', 'N0,vec')
Nick Curtis
committed
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
queue = cl.CommandQueue(ctx)
# check that the kernel compiles correctly
knl(queue, a=np.zeros((12,), dtype=rhs_dtype).reshape((3, 4)))
# check that we have or don't have a conversion
assert ('convert_' in lp.generate_code_v2(knl).device_code()) == \
has_conversion
# test simple dtype conversion
__make_kernel("temp = a[i]")
# test literal assignment
__make_kernel("a[i] = 1", False, False)
# test that a non-vector temporary doesn't trigger conversion
#
# this should generate the code (e.g.,):
# __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1)))
# loopy_kernel(__global long4 *__restrict__ a)
# {
# int temp;
# for (int i_outer = 0; i_outer <= 2; ++i_outer)
# {
# temp = 1;
# a[i_outer] = temp;
# }
# }
#
# that is, temp should _not_ be assigned to "a" w/ convert_long4
__make_kernel("""
temp = 1
a[i] = temp
""", has_conversion=False)
# test that the inverse _does_ result in a convers
__make_kernel("""
temp = a[i] {id=1, dep=*}
a[i] = temp {id=2, dep=1}
""")
Nick Curtis
committed
@pytest.mark.parametrize(('dtype'), [np.int32, np.int64, np.float32, np.float64])
def test_explicit_simd_vector_iname_in_conditional(ctx_factory, dtype):
ctx = ctx_factory()
def create_and_test(insn, answer, debug=False):
knl = lp.make_kernel(['{[i]: 0 <= i < 12}', '{[j]: 0 <= j < 1}'],
insn,
Nick Curtis
committed
[lp.GlobalArg('a', shape=(1, 12,), dtype=dtype),
lp.GlobalArg('b', shape=(1, 12,), dtype=dtype)])
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.tag_inames(knl, [('j', 'g.0')])
knl = lp.split_array_axis(knl, ['a', 'b'], 1, 4)
knl = lp.tag_array_axes(knl, ['a', 'b'], 'N1,N0,vec')
# ensure we can generate code
code = lp.generate_code_v2(knl).device_code()
if debug:
print(code)
# and check answer
queue = cl.CommandQueue(ctx)
Nick Curtis
committed
a = np.zeros((1, 3, 4), dtype=dtype)
b = np.arange(12, dtype=dtype).reshape((1, 3, 4))
result = knl(queue, a=a, b=b)[1][0]
assert np.array_equal(result.flatten('C'), answer)
ans = np.arange(12, dtype=np.int32)
ans[:7] = 0
create_and_test("""
if i >= 7
a[j, i] = b[j, i]
end
""", ans)
Nick Curtis
committed
def test_vectorizability():
# check new vectorizability conditions
from loopy.kernel.array import VectorArrayDimTag
from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type
Nick Curtis
committed
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
def create_and_test(insn, exception=None, a=None, b=None):
a = np.zeros((3, 4), dtype=np.int32) if a is None else a
data = [lp.GlobalArg('a', shape=(12,), dtype=a.dtype)]
kwargs = dict(a=a)
if b is not None:
data += [lp.GlobalArg('b', shape=(12,), dtype=b.dtype)]
kwargs['b'] = b
names = [d.name for d in data]
knl = lp.make_kernel(['{[i]: 0 <= i < 12}'],
"""
for i
%(insn)s
end
""" % dict(insn=insn),
data
)
knl = lp.split_iname(knl, 'i', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, names, 0, 4)
knl = lp.tag_array_axes(knl, names, 'N0,vec')
knl = lp.preprocess_kernel(knl)
lp.generate_code_v2(knl).device_code()
assert knl.instructions[0].within_inames & set(['i_inner'])
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert isinstance(knl.args[0].dim_tags[-1], VectorArrayDimTag)
assert filter_iname_tags_by_type(knl.iname_to_tags['i_inner'], VectorizeTag)
Nick Curtis
committed
def run(op_list=[], unary_operators=[], func_list=[], unary_funcs=[],
rvals=['1', 'a[i]']):
Nick Curtis
committed
for op in op_list:
template = 'a[i] = a[i] %(op)s %(rval)s' \
if op not in unary_operators else 'a[i] = %(op)s a[i]'
for rval in rvals:
create_and_test(template % dict(op=op, rval=rval))
Nick Curtis
committed
for func in func_list:
template = 'a[i] = %(func)s(a[i], %(rval)s)' \
if func not in unary_funcs else 'a[i] = %(func)s(a[i])'
for rval in rvals:
create_and_test(template % dict(func=func, rval=rval))
Nick Curtis
committed
# 1) comparisons
run(['>', '>=', '<', '<=', '==', '!='])
# 2) logical operators
run(['and', 'or', 'not'], ['not'])
# 3) bitwise operators
# bitwise xor '^' not not implemented in codegen
run(['~', '|', '&'], ['~'])
Nick Curtis
committed
# 4) functions -- a random selection of the enabled math functions in opencl
run(func_list=['acos', 'exp10', 'atan2', 'round'],
unary_funcs=['round', 'acos', 'exp10'])
# 5) remainders and floor division (use 4 instead of 1 to avoid pymbolic
# optimizing out the a[i] % 1)
run(['%', '//'], rvals=['a[i]', '4'])
def test_check_for_variable_access_ordering():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
a[i+1] = 13
""")
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
def test_check_for_variable_access_ordering_with_aliasing():
knl = lp.make_kernel(
"{[i]: 0<=i<n}",
"""
a[i] = 12
b[i+1] = 13
""",
[
lp.TemporaryVariable("a", shape="n+1", base_storage="tmp"),
lp.TemporaryVariable("b", shape="n+1", base_storage="tmp"),
])
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import VariableAccessNotOrdered
with pytest.raises(VariableAccessNotOrdered):
lp.get_one_scheduled_kernel(knl)
@pytest.mark.parametrize(("second_index", "expect_barrier"),
[
("2*i", True),
("2*i+1", False),
])
def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
knl = lp.make_kernel(
"{[i]: 0<=i<128}",
"""
a[2*i] = 12 {id=first}
a[%s] = 13 {id=second,dep=first}
""" % second_index,
[
lp.TemporaryVariable("a", lp.auto, shape=(256,),
scope=lp.AddressSpace.LOCAL),
])
knl = lp.tag_inames(knl, "i:l.0")
knl = lp.preprocess_kernel(knl)
knl = lp.get_one_scheduled_kernel(knl)
assert barrier_between(knl, "first", "second") == expect_barrier
def test_half_complex_conditional(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.make_kernel(
"{[i]: 0 <= i < 10}",
"""
tmp[i] = if(i < 5, 0, 0j)
""")
knl(queue)
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
def test_dep_cycle_printing_and_error():
# https://gitlab.tiker.net/inducer/loopy/issues/140
# This kernel has two dep cycles.
knl = lp.make_kernel('{[i,j,k]: 0 <= i,j,k < 12}',
"""
for j
for i
<> nu = i - 4
if nu > 0
<> P_val = a[i, j] {id=pset0}
else
P_val = 0.1 * a[i, j] {id=pset1}
end
<> B_sum = 0
for k
B_sum = B_sum + k * P_val {id=bset, dep=pset*}
end
# here, we are testing that Kc is properly promoted to a vector dtype
<> Kc = P_val * B_sum {id=kset, dep=bset}
a[i, j] = Kc {dep=kset}
end
end
""",
[lp.GlobalArg('a', shape=(12, 12), dtype=np.int32)])
knl = lp.split_iname(knl, 'j', 4, inner_tag='vec')
knl = lp.split_array_axis(knl, 'a', 1, 4)
knl = lp.tag_array_axes(knl, 'a', 'N1,N0,vec')
knl = lp.preprocess_kernel(knl)
from loopy.diagnostic import DependencyCycleFound
with pytest.raises(DependencyCycleFound):
print(lp.generate_code(knl)[0])
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
main([__file__])