Skip to content
Snippets Groups Projects
Commit 96eca21e authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Variety of (mostly CSE-related) bug fixes.

parent 724bf1c3
No related branches found
No related tags found
No related merge requests found
......@@ -42,13 +42,10 @@ Things to consider
To-do
^^^^^
- variable shuffle detection
-> will need unification
- Automatically generate testing code vs. sequential.
- For forced workgroup sizes: check that at least one iname
maps to it.
maps to them.
- If isl can prove that all operands are positive, may use '/' instead of
'floor_div'.
......@@ -95,6 +92,9 @@ Future ideas
Dealt with
^^^^^^^^^^
- variable shuffle detection
-> will need unification
- Dimension joining
- user interface for dim length prescription
......
......@@ -235,9 +235,9 @@ def make_kernel(*args, **kwargs):
def split_dimension(kernel, iname, inner_length,
outer_iname=None, inner_iname=None,
outer_tag=None, inner_tag=None,
slabs=(0, 0)):
slabs=(0, 0), do_tagged_check=True):
if kernel.iname_to_tag.get(iname) is not None:
if do_tagged_check and kernel.iname_to_tag.get(iname) is not None:
raise RuntimeError("cannot split already tagged iname '%s'" % iname)
if iname not in kernel.all_inames():
......
......@@ -295,7 +295,7 @@ def process_cses(kernel, lead_csed, cse_descriptors):
def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local,
new_inames, ind_iname_to_tag):
independent_inames, new_inames, ind_iname_to_tag):
insn = lead_csed.insn
# {{{ decide whether to force a dep
......@@ -310,13 +310,13 @@ def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local,
assert dependencies <= parent_inames
for iname in parent_inames:
if iname in lead_csed.independent_inames:
if iname in independent_inames:
tag = ind_iname_to_tag[iname]
else:
tag = kernel.iname_to_tag.get(iname)
if should_cse_force_iname_dep(
iname, lead_csed.independent_inames, tag, dependencies,
iname, independent_inames, tag, dependencies,
target_var_is_local, lead_csed.cse):
forced_iname_deps.add(iname)
......@@ -324,7 +324,7 @@ def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local,
assignee = var(target_var_name)
if lead_csed.independent_inames:
if new_inames:
assignee = assignee[tuple(
var(iname) for iname in new_inames
)]
......@@ -334,8 +334,7 @@ def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local,
subst_map = SubstitutionMapper(make_subst_func(
dict(
(old_iname, var(new_iname))
for old_iname, new_iname in zip(lead_csed.independent_inames,
new_inames))))
for old_iname, new_iname in zip(independent_inames, new_inames))))
new_inner_expr = subst_map(lead_csed.cse.child)
insn_prefix = lead_csed.cse.prefix
......@@ -483,7 +482,7 @@ def realize_cse(kernel, cse_tag, dtype, independent_inames=[],
compute_insn = make_compute_insn(
kernel, lead_csed, target_var_name, target_var_is_local,
new_inames, ind_iname_to_tag)
independent_inames, new_inames, ind_iname_to_tag)
# {{{ substitute variable references into instructions
......@@ -493,12 +492,16 @@ def realize_cse(kernel, cse_tag, dtype, independent_inames=[],
lead_indices = [var(iname) for iname in independent_inames]
else:
found = False
for csed in cse_descriptors:
if cse is csed.cse:
found = True
break
if cse is not csed.cse:
return rec(cse.child)
if not found:
from pymbolic.primitives import CommonSubexpression
return CommonSubexpression(
rec(cse.child), cse.prefix)
lead_indices = csed.lead_index_exprs
......
......@@ -359,7 +359,8 @@ def assign_automatic_axes(kernel, only_axis_0=True):
from loopy import split_dimension
return assign_automatic_axes(
split_dimension(kernel, iname, inner_length=local_size[axis],
outer_tag=UnrollTag(), inner_tag=new_tag),
outer_tag=UnrollTag(), inner_tag=new_tag,
do_tagged_check=False),
only_axis_0=only_axis_0)
new_iname_to_tag = kernel.iname_to_tag.copy()
......
......@@ -175,7 +175,6 @@ def test_transpose(ctx_factory):
knl = lp.split_dimension(knl, "j", 16,
outer_tag="g.1", inner_tag="l.0")
knl = lp.add_prefetch(knl, 'a', ["i_inner", "j_inner"])
knl = lp.add_prefetch(knl, 'b', ["j_inner", "k_inner", ])
kernel_gen = lp.generate_loop_schedules(knl)
kernel_gen = lp.check_kernels(kernel_gen, {})
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment