Skip to content
Snippets Groups Projects
Commit b939f7fa authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Assorted fixery to make stencil test pass.

parent 2ce04875
No related branches found
No related tags found
No related merge requests found
......@@ -78,7 +78,7 @@ Future ideas
- Check for unordered (no-dependency) writes to the same location
- String instructions?
- Vanilla C string instructions?
- Barriers for data exchanged via global vars?
......
......@@ -353,6 +353,8 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
arg = kernel.arg_dict[var_name]
# {{{ make parameter names and unification template
parameters = []
for i in range(arg.dimensions):
based_on = "%s_dim_%d" % (c_name, i)
......@@ -371,8 +373,12 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
elif len(parameters) == 1:
uni_template = uni_template[var(parameters[0])]
# }}}
kernel = extract_subst(kernel, rule_name, uni_template, parameters)
# {{{ track applied iname rewrites on footprint_subscripts
if footprint_subscripts is not None:
if not isinstance(footprint_subscripts, (list, tuple)):
footprint_subscripts = [footprint_subscripts]
......@@ -403,6 +409,8 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
else:
subst_use = rule_name
# }}}
new_kernel = precompute(kernel, subst_use, arg.dtype, sweep_inames,
new_storage_axis_names=dim_arg_names,
default_tag=default_tag)
......
......@@ -1690,14 +1690,14 @@ class SetOperationCacheManager:
lower_bound_pw_aff = self.dim_min(set, iname_to_dim[iname][1])
upper_bound_pw_aff = self.dim_max(set, iname_to_dim[iname][1])
from loopy.isl_helpers import static_max_of_pw_aff, static_min_of_pw_aff
from loopy.isl_helpers import static_max_of_pw_aff, static_value_of_pw_aff
from loopy.symbolic import pw_aff_to_expr
size = pw_aff_to_expr(static_max_of_pw_aff(
upper_bound_pw_aff - lower_bound_pw_aff + 1, constants_only=True,
context=context))
base_index = pw_aff_to_expr(
static_min_of_pw_aff(lower_bound_pw_aff, constants_only=False,
static_value_of_pw_aff(lower_bound_pw_aff, constants_only=False,
context=context))
return base_index, size
......
......@@ -33,7 +33,7 @@ class ScalarReductionOperation(ReductionOperation):
result = type(self).__name__.replace("ReductionOperation", "").lower()
if self.forced_result_dtype is not None:
result = "%s<%s>" % (result, str(self.dtype))
result = "%s<%s>" % (result, str(self.forced_result_dtype))
return result
......
......@@ -221,7 +221,7 @@ def test_plain_matrix_mul(ctx_factory):
kernel_gen = lp.check_kernels(kernel_gen, {})
lp.auto_test_vs_ref(ref_knl, ctx, kernel_gen,
op_count=vec_size*2*n**3/1e9, op_label="GFlops/s",
op_count=[vec_size*2*n**3/1e9], op_label=["GFlops"],
parameters={"n": n}, check_result=check)
......@@ -354,7 +354,7 @@ def test_rank_one(ctx_factory):
kernel_gen = lp.check_kernels(kernel_gen, dict(n=n))
lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen,
op_count=np.dtype(dtype).itemsize*n**2/1e9, op_label="GBytes",
op_count=[np.dtype(dtype).itemsize*n**2/1e9], op_label=["GBytes"],
parameters={"n": n})
......
......@@ -89,39 +89,41 @@ def test_multi_cse(ctx_factory):
def test_stencil(ctx_factory):
ctx = ctx_factory()
# n=32 causes corner case behavior in size calculations for temprorary (a
# non-unifiable, two-constant-segments PwAff as the base index)
n = 256
knl = lp.make_kernel(ctx.devices[0],
"{[i,j]: 0<= i,j < 32}",
"{[i,j]: 0<= i,j < %d}" % n,
[
"[i] z[i,j] = -2*a[i,j]"
" + a[i,j-1]"
" + a[i,j+1]"
" + a[i-1,j]"
" + a[i+1,j]"
"a_offset(ii, jj) := a[ii+1, jj+1]",
"z[i,j] = -2*a_offset(i,j)"
" + a_offset(i,j-1)"
" + a_offset(i,j+1)"
" + a_offset(i-1,j)"
" + a_offset(i+1,j)"
],
[
lp.GlobalArg("a", np.float32, shape=(32,32,)),
lp.GlobalArg("z", np.float32, shape=(32,32,))
lp.GlobalArg("a", np.float32, shape=(n+2,n+2,)),
lp.GlobalArg("z", np.float32, shape=(n+2,n+2,))
])
ref_knl = knl
def variant_1(knl):
knl = lp.add_prefetch(knl, "a", [0, 1])
return knl
def variant_2(knl):
knl = lp.split_dimension(knl, "i", 16, outer_tag="g.1", inner_tag="l.1")
knl = lp.split_dimension(knl, "j", 16, outer_tag="g.0", inner_tag="l.0")
knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"])
return knl
#for variant in [variant_1, variant_2]:
for variant in [variant_2]:
for variant in [variant_1]:
kernel_gen = lp.generate_loop_schedules(variant(knl),
loop_priority=["i_outer", "i_inner_0", "j_0"])
kernel_gen = lp.check_kernels(kernel_gen)
for knl in kernel_gen:
print lp.generate_code(knl)
lp.auto_test_vs_ref(ref_knl, ctx, kernel_gen,
fills_entire_output=False, print_ref_code=True,
op_count=[n*n], op_label=["cells"])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment