diff --git a/MEMO b/MEMO index 148999e6ae10f8030b1585287d67648161473f70..dd00893d8c7085ccd78cdd6adb103eed194f0059 100644 --- a/MEMO +++ b/MEMO @@ -42,9 +42,6 @@ Things to consider To-do ^^^^^ -- Just touching a variable written to by a non-idempotent - instruction makes that instruction also not idempotent - - user interface for dim length prescription - Deal with equality constraints. @@ -89,6 +86,11 @@ Future ideas Dealt with ^^^^^^^^^^ +- Just touching a variable written to by a non-idempotent + instruction makes that instruction also not idempotent + -> Idempotent renamed to boostable. + -> Done. + - Give the user control over which reduction inames are duplicated. diff --git a/loopy/kernel.py b/loopy/kernel.py index 9af083c2e3dadfafaa174e492ff31ad145468b39..2eff255384969f11d7b37818e7f383da36c3c6c1 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -223,8 +223,9 @@ class Instruction(Record): :ivar insn_deps: a list of ids of :class:`Instruction` instances that *must* be executed before this one. Note that loop scheduling augments this by adding dependencies on any writes to temporaries read by this instruction. - :ivar idempotent: Whether the instruction may be executed repeatedly (while obeying - dependencies) without changing the meaning of the program. + :ivar boostable: Whether the instruction may safely be executed + inside more loops than advertised without changing the meaning + of the program. Allowed values are *None* (for unknwon), *True*, and *False*. The following two instance variables are only used until :func:`loopy.kernel.make_kernel` is finished: @@ -236,13 +237,13 @@ class Instruction(Record): """ def __init__(self, id, assignee, expression, - forced_iname_deps=[], insn_deps=[], idempotent=None, + forced_iname_deps=[], insn_deps=[], boostable=None, temp_var_type=None, duplicate_inames_and_tags=[]): Record.__init__(self, id=id, assignee=assignee, expression=expression, forced_iname_deps=forced_iname_deps, - insn_deps=insn_deps, idempotent=idempotent, + insn_deps=insn_deps, boostable=boostable, temp_var_type=temp_var_type, duplicate_inames_and_tags=duplicate_inames_and_tags) @memoize_method @@ -268,14 +269,14 @@ class Instruction(Record): result = "%s: %s <- %s\n [%s]" % (self.id, self.assignee, self.expression, ", ".join(sorted(self.all_inames()))) - if self.idempotent == True: - result += " (idempotent)" - elif self.idempotent == False: - result += " (not idempotent)" - elif self.idempotent is None: - result += " (idempotence unknown)" + if self.boostable == True: + result += " (boostable)" + elif self.boostable == False: + result += " (not boostable)" + elif self.boostable is None: + result += " (boostability unknown)" else: - raise RuntimeError("unexpected value for Instruction.idempotent") + raise RuntimeError("unexpected value for Instruction.boostable") if self.insn_deps: result += "\n : " + ", ".join(self.insn_deps) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 6f02fd2af542d379522ac12f91f5f967ebb19201..a06e28b4eff24e6451281e3b70c3a86f7697e59a 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -98,7 +98,7 @@ def realize_reduction(kernel): # }}} -# {{{ automatic dependencies, find idempotent instructions +# {{{ automatic dependencies, find boostability of instructions def find_accessors(kernel, readers): """ @@ -129,7 +129,7 @@ def find_accessors(kernel, readers): -def add_idempotence_and_automatic_dependencies(kernel): +def add_boostability_and_automatic_dependencies(kernel): writer_map = find_accessors(kernel, readers=False) arg_names = set(arg.name for arg in kernel.args) @@ -145,11 +145,14 @@ def add_idempotence_and_automatic_dependencies(kernel): set(var.name for var in dm(insn.expression)) & var_names) + non_boostable_vars = set() + new_insns = [] for insn in kernel.instructions: auto_deps = [] # {{{ add automatic dependencies + all_my_var_writers = set() for var in dep_map[insn.id]: var_writers = writer_map.get(var, set()) @@ -171,7 +174,7 @@ def add_idempotence_and_automatic_dependencies(kernel): # }}} - # {{{ find dependency loops, flag idempotence + # {{{ find dependency loops, flag boostability while True: last_all_my_var_writers = all_my_var_writers @@ -185,12 +188,30 @@ def add_idempotence_and_automatic_dependencies(kernel): # }}} + boostable = insn.id not in all_my_var_writers + + if not boostable: + non_boostable_vars.add(insn.get_assignee_var_name()) + new_insns.append( insn.copy( insn_deps=insn.insn_deps + auto_deps, - idempotent=insn.id not in all_my_var_writers)) + boostable=boostable)) + + # {{{ remove boostability from isns that access non-boostable vars + + new2_insns = [] + for insn in new_insns: + accessed_vars = ( + set([insn.get_assignee_var_name()]) + | insn.get_read_var_names()) + + boostable = insn.boostable and not bool(non_boostable_vars & accessed_vars) + new2_insns.append(insn.copy(boostable=boostable)) + + # }}} - return kernel.copy(instructions=new_insns) + return kernel.copy(instructions=new2_insns) # }}} @@ -480,7 +501,7 @@ def preprocess_kernel(kernel): # }}} kernel = assign_automatic_axes(kernel) - kernel = add_idempotence_and_automatic_dependencies(kernel) + kernel = add_boostability_and_automatic_dependencies(kernel) kernel = adjust_local_temp_var_storage(kernel) import loopy.check as chk diff --git a/loopy/schedule.py b/loopy/schedule.py index d9e52dfa8aedacf003f3b3190dc57ff5dacb2297..d6129567666f0b5464e0db9e1272dfecd9b22c98 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -191,8 +191,8 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[]): schedule_now = set(insn.insn_deps) <= scheduled_insn_ids - if insn.idempotent == True: - # If insn is idempotent, it may be placed inside a more deeply + if insn.boostable == True: + # If insn is boostable, it may be placed inside a more deeply # nested loop without harm. # But if it can be scheduled on the way *out* of the currently @@ -212,8 +212,8 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[]): if schedulable_at_loop_levels != [len(active_inames)]: schedule_now = False - elif insn.idempotent == False: - # If insn is not idempotent, we must insist that it is placed inside + elif insn.boostable == False: + # If insn is not boostable, we must insist that it is placed inside # the exactly correct set of loops. schedule_now = schedule_now and ( @@ -222,7 +222,7 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[]): active_inames_set - parallel_inames) else: - raise RuntimeError("instruction '%s' has undetermined idempotence" + raise RuntimeError("instruction '%s' has undetermined boostability" % insn.id) if schedule_now: