Newer
Older
Andreas Klöckner
committed
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_arg_guessing(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n }",
],
"""
a = 1.5 + sum((i,j), i*j)
b[i, j] = i*j
c[i+j, j] = b[j,i]
""",
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_arg_guessing_with_reduction(ctx_factory):
#logging.basicConfig(level=logging.DEBUG)
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n }",
],
"""
a = 1.5 + sum((i,j), i*j)
d = 1.5 + sum((i,j), b[i,j])
b[i, j] = i*j
c[i+j, j] = b[j,i]
""",
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
# }}}
def test_nonlinear_index(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n }",
],
"""
a[i*i] = 17
""",
[
lp.GlobalArg("a", shape="n"),
lp.ValueArg("n"),
],
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_triangle_domain(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n and i <= j}",
],
"a[i,j] = 17",
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_offsets_and_slicing(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i<n and 0<=j<m }",
],
"""
b[i,j] = 2*a[i,j]
""",
assumptions="n>=1 and m>=1",
default_offset=lp.auto)
knl = lp.tag_data_axes(knl, "a,b", "stride:auto,stride:1")
cknl = lp.CompiledKernel(ctx, knl)
a_full = cl.clrandom.rand(queue, (n, n), np.float64)
a_full_h = a_full.get()
b_full = cl.clrandom.rand(queue, (n, n), np.float64)
b_full_h = b_full.get()
a_sub = (slice(3, 10), slice(5, 10))
a = a_full[a_sub]
b_sub = (slice(3+3, 10+3), slice(5+4, 10+4))
b = b_full[b_sub]
b_full_h[b_sub] = 2*a_full_h[a_sub]
print cknl.get_highlighted_code({"a": a.dtype})
import numpy.linalg as la
assert la.norm(b_full.get() - b_full_h) < 1e-13
Andreas Klöckner
committed
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
def test_vector_ilp_with_prefetch(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0],
"{ [i]: 0<=i<n }",
"out[i] = 2*a[i]",
[
# Tests that comma-d arguments interoperate with
# argument guessing.
lp.GlobalArg("out,a", np.float32, shape=lp.auto),
"..."
])
knl = lp.split_iname(knl, "i", 128, inner_tag="l.0")
knl = lp.split_iname(knl, "i_outer", 4, outer_tag="g.0", inner_tag="ilp")
knl = lp.add_prefetch(knl, "a", ["i_inner", "i_outer_inner"])
code, info = lp.generate_code(knl)
import re
assert len(list(re.finditer("barrier", code))) == 1
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from py.test.cmdline import main
main([__file__])
# vim: foldmethod=marker