Newer
Older
lp.GlobalArg("c", shape=lp.auto),
Andreas Klöckner
committed
lp.ValueArg("n"),
],
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_arg_guessing(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n }",
],
"""
a = 1.5 + sum((i,j), i*j)
b[i, j] = i*j
c[i+j, j] = b[j,i]
""",
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_arg_guessing_with_reduction(ctx_factory):
#logging.basicConfig(level=logging.DEBUG)
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n }",
],
"""
a = 1.5 + sum((i,j), i*j)
d = 1.5 + sum((i,j), b[i,j])
b[i, j] = i*j
c[i+j, j] = b[j,i]
""",
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
# }}}
def test_nonlinear_index(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n }",
],
"""
a[i*i] = 17
""",
[
lp.GlobalArg("a", shape="n"),
lp.ValueArg("n"),
],
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_triangle_domain(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n and i <= j}",
],
"a[i,j] = 17",
assumptions="n>=1")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
def test_offsets_and_slicing(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i<n and 0<=j<m }",
],
"""
b[i,j] = 2*a[i,j]
""",
assumptions="n>=1 and m>=1",
default_offset=lp.auto)
knl = lp.tag_data_axes(knl, "a,b", "stride:auto,stride:1")
cknl = lp.CompiledKernel(ctx, knl)
a_full = cl.clrandom.rand(queue, (n, n), np.float64)
a_full_h = a_full.get()
b_full = cl.clrandom.rand(queue, (n, n), np.float64)
b_full_h = b_full.get()
a_sub = (slice(3, 10), slice(5, 10))
a = a_full[a_sub]
b_sub = (slice(3+3, 10+3), slice(5+4, 10+4))
b = b_full[b_sub]
b_full_h[b_sub] = 2*a_full_h[a_sub]
print cknl.get_highlighted_code({"a": a.dtype})
import numpy.linalg as la
assert la.norm(b_full.get() - b_full_h) < 1e-13
Andreas Klöckner
committed
def test_vector_ilp_with_prefetch(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0],
"{ [i]: 0<=i<n }",
"out[i] = 2*a[i]",
[
# Tests that comma'd arguments interoperate with
Andreas Klöckner
committed
# argument guessing.
lp.GlobalArg("out,a", np.float32, shape=lp.auto),
"..."
])
knl = lp.split_iname(knl, "i", 128, inner_tag="l.0")
knl = lp.split_iname(knl, "i_outer", 4, outer_tag="g.0", inner_tag="ilp")
knl = lp.add_prefetch(knl, "a", ["i_inner", "i_outer_inner"])
cknl = lp.CompiledKernel(ctx, knl)
cknl.cl_kernel_info()
Andreas Klöckner
committed
import re
Andreas Klöckner
committed
assert len(list(re.finditer("barrier", code))) == 1
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
def test_convolution_like(ctx_factory):
ctx = ctx_factory()
dtype = np.float64
knl = lp.make_kernel(ctx.devices[0],
"{ [im_x, im_y, f_x, f_y]: -f_w <= f_x,f_y <= f_w \
and f_w <= im_x < im_w-f_w and f_w <= im_y < im_h-f_w }",
"""
out[im_x-f_w, im_y-f_w] = sum((f_x, f_y), \
img[im_x-f_x, im_y-f_y] * f[f_w+f_x, f_w+f_y])
""",
[
lp.GlobalArg("f", dtype, shape=lp.auto),
lp.GlobalArg("img", dtype, shape=lp.auto),
lp.GlobalArg("out", dtype, shape=lp.auto),
"..."
],
assumptions="f_w>=1 and im_w, im_h >= 1")
ref_knl = knl
def variant(knl):
knl = lp.split_iname(knl, "im_x", 16, inner_tag="l.0")
return knl
lp.auto_test_vs_ref(ref_knl, ctx, variant(knl),
parameters={"im_w": 1024, "im_h": 1024, "f_w": 7})
def test_c_instruction(ctx_factory):
#logging.basicConfig(level=logging.DEBUG)
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[i,j]: 0<=i,j<n }",
],
[
lp.CInstruction("i", """
x = sin((float) i);
""", assignees="x"),
"a[i*i] = x",
],
[
lp.GlobalArg("a", shape="n"),
lp.ValueArg("n"),
lp.TemporaryVariable("x", np.float32),
],
assumptions="n>=1")
knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
print knl
print lp.CompiledKernel(ctx, knl).get_highlighted_code()
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
def test_dependent_domain_insn_iname_finding(ctx_factory):
ctx = ctx_factory()
knl = lp.make_kernel(ctx.devices[0], [
"{[isrc_box]: 0<=isrc_box<nsrc_boxes}",
"{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}",
],
"""
<> src_ibox = source_boxes[isrc_box]
<> isrc_start = box_source_starts[src_ibox]
<> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox]
<> strength = strengths[isrc] {id=set_strength}
""",
[
lp.GlobalArg("box_source_starts,box_source_counts_nonchild",
None, shape=None),
"..."])
print knl
assert "isrc_box" in knl.insn_inames("set_strength")
print lp.CompiledKernel(ctx, knl).get_highlighted_code(
dict(
source_boxes=np.int32,
box_source_starts=np.int32,
box_source_counts_nonchild=np.int32,
strengths=np.float64,
))
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from py.test.cmdline import main
main([__file__])
# vim: foldmethod=marker