Skip to content
Snippets Groups Projects
Commit 17ddb114 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Allow unroll of sequential loops.

'SequentialTag' was the wrong idea, it impeded sequential unroll.
Restrict-to-sequential and tagging have nothing to do with each other.
This is now realized in the code.
parent 94eec370
No related branches found
No related tags found
No related merge requests found
......@@ -86,6 +86,10 @@ Future ideas
Dealt with
^^^^^^^^^^
- Restrict-to-sequential and tagging have nothing to do with each other.
-> Removed SequentialTag and turned it into a separate computed kernel
property.
- Just touching a variable written to by a non-idempotent
instruction makes that instruction also not idempotent
-> Idempotent renamed to boostable.
......
......@@ -127,15 +127,25 @@ def tag_dimensions(kernel, iname_to_tag):
iname_to_tag = dict((iname, parse_tag(tag))
for iname, tag in iname_to_tag.iteritems())
from loopy.kernel import ParallelTag
new_iname_to_tag = kernel.iname_to_tag.copy()
for iname, new_tag in iname_to_tag.iteritems():
old_tag = kernel.iname_to_tag.get(iname)
if old_tag is not None and new_tag is None:
raise ValueError("cannot untag iname '%s'" % iname)
if new_tag is None:
continue
if iname not in kernel.all_inames():
raise ValueError("cannot tag '%s'--not known" % iname)
old_tag = kernel.iname_to_tag.get(iname)
if isinstance(new_tag, ParallelTag) and iname in kernel.sequential_inames:
raise ValueError("cannot tag '%s' as parallel--"
"iname requires sequential execution" % iname)
if old_tag is not None and (old_tag != new_tag):
raise RuntimeError("'%s' is already tagged '%s'--cannot retag"
% (iname, old_tag))
......
......@@ -41,10 +41,10 @@ def generate_code_for_sched_index(kernel, sched_index, codegen_state):
generate_unroll_loop,
generate_sequential_loop_dim_code)
from loopy.kernel import UnrollTag, SequentialTag
from loopy.kernel import UnrollTag
if isinstance(tag, UnrollTag):
func = generate_unroll_loop
elif tag is None or isinstance(tag, SequentialTag):
elif tag is None:
func = generate_sequential_loop_dim_code
else:
raise RuntimeError("encountered (invalid) EnterLoop for '%s', tagged '%s'"
......
......@@ -23,10 +23,6 @@ class IndexTag(Record):
class SequentialTag(IndexTag):
def __str__(self):
return "seq"
class ParallelTag(IndexTag):
pass
......@@ -254,17 +250,6 @@ class Instruction(Record):
return index_vars | set(self.forced_iname_deps)
@memoize_method
def sequential_inames(self, iname_to_tag):
result = set()
for iname in self.all_inames():
tag = iname_to_tag.get(iname)
if isinstance(tag, SequentialTag):
result.add(iname)
return result
def __str__(self):
result = "%s: %s <- %s\n [%s]" % (self.id,
self.assignee, self.expression, ", ".join(sorted(self.all_inames())))
......@@ -444,7 +429,8 @@ class LoopKernel(Record):
temporary_variables={},
workgroup_size=None,
iname_to_dim=None,
iname_to_tag={}):
iname_to_tag={},
):
"""
:arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl.
Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}"
......@@ -515,7 +501,7 @@ class LoopKernel(Record):
dup_groups = dup_entry_match.groupdict()
dup_iname = dup_groups["iname"]
assert dup_iname
dup_tag = AutoFitLocalIndexTag()
dup_tag = None
if dup_groups["tag"] is not None:
dup_tag = parse_tag(dup_groups["tag"])
......@@ -548,32 +534,6 @@ class LoopKernel(Record):
if len(set(insn.id for insn in insns)) != len(insns):
raise RuntimeError("instruction ids do not appear to be unique")
# {{{ find and properly tag reduction inames
reduction_inames = set()
from loopy.symbolic import ReductionCallbackMapper
def map_reduction(expr, rec):
rec(expr.expr)
reduction_inames.update(expr.inames)
for insn in insns:
ReductionCallbackMapper(map_reduction)(insn.expression)
iname_to_tag = iname_to_tag.copy()
if reduction_inames:
for iname in reduction_inames:
tag = iname_to_tag.get(iname)
if not (tag is None or isinstance(tag, SequentialTag)):
raise RuntimeError("inconsistency detected: "
"sequential/reduction iname '%s' was "
"tagged otherwise" % iname)
iname_to_tag[iname] = SequentialTag()
# }}}
if assumptions is None:
assumptions_space = domain.get_space().params()
......@@ -610,6 +570,28 @@ class LoopKernel(Record):
if id_str not in used_ids:
return id_str
@property
@memoize_method
def sequential_inames(self):
result = set()
def map_reduction(red_expr, rec):
rec(red_expr.expr)
result.update(red_expr.inames)
from loopy.symbolic import ReductionCallbackMapper
for insn in self.instructions:
ReductionCallbackMapper(map_reduction)(insn.expression)
for iname in result:
tag = self.iname_to_tag.get(iname)
if tag is not None and isinstance(tag, ParallelTag):
raise RuntimeError("inconsistency detected: "
"sequential/reduction iname '%s' has "
"a parallel tag" % iname)
return result
@property
@memoize_method
def iname_to_dim(self):
......@@ -850,9 +832,12 @@ def count_reduction_iname_uses(insn):
return reduction_iname_uses
# }}}
# {{{ pass 2 of kernel creation
def make_kernel(*args, **kwargs):
"""Second pass of kernel creation. Think about requests for iname duplication
and temporary variable declaration received as part of string instructions.
......@@ -902,6 +887,9 @@ def make_kernel(*args, **kwargs):
child = subst_map(child)
for old_iname, new_iname in zip(reduction_expr.inames, new_red_inames):
new_iname_to_tag[new_iname] = insn_dup_iname_to_tag[old_iname]
from loopy.symbolic import Reduction
return Reduction(
operation=reduction_expr.operation,
......@@ -914,6 +902,9 @@ def make_kernel(*args, **kwargs):
# {{{ iname duplication
if insn.duplicate_inames_and_tags:
insn_dup_iname_to_tag = dict(insn.duplicate_inames_and_tags)
# {{{ duplicate non-reduction inames
reduction_iname_uses = count_reduction_iname_uses(insn)
......@@ -921,8 +912,6 @@ def make_kernel(*args, **kwargs):
duplicate_inames = [iname
for iname, tag in insn.duplicate_inames_and_tags
if iname not in reduction_iname_uses]
new_iname_tags = [tag for iname, tag in insn.duplicate_inames_and_tags
if iname not in reduction_iname_uses]
new_inames = [
knl.make_unique_var_name(
......@@ -931,8 +920,11 @@ def make_kernel(*args, **kwargs):
newly_created_vars)
for iname in duplicate_inames]
for iname, tag in zip(new_inames, new_iname_tags):
new_iname_to_tag[iname] = tag
for old_iname, new_iname in zip(duplicate_inames, new_inames):
new_tag = insn_dup_iname_to_tag[old_iname]
if new_tag is None:
new_tag = AutoFitLocalIndexTag()
new_iname_to_tag[new_iname] = new_tag
newly_created_vars.update(new_inames)
......@@ -1025,6 +1017,8 @@ def make_kernel(*args, **kwargs):
temporary_variables=new_temp_vars,
iname_to_tag=new_iname_to_tag)
# }}}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment