From 871cc0a5b82c227d948dbe3da7be531dba3cb86c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Fri, 26 Jul 2013 13:14:00 -0400 Subject: [PATCH] Automatic dependencies (dumb) -> default dependencies (better) --- doc/reference.rst | 11 ++-- loopy/check.py | 2 +- loopy/kernel/creation.py | 5 +- loopy/kernel/data.py | 10 ++-- loopy/preprocess.py | 111 +++++++++++++++++++++++++-------------- test/test_sem_reagan.py | 1 + 6 files changed, 91 insertions(+), 49 deletions(-) diff --git a/doc/reference.rst b/doc/reference.rst index a1820e219..251d86eb4 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -183,9 +183,10 @@ These are usually key-value pairs. The following attributes are recognized: .. note:: - Loopy will automatically add depdencies of reading instructions - on writing instructions *if and only if* there is exactly one writing - instruction for the written variable (temporary or argument). + If this is not specified, :mod:`loopy` will automatically add + depdencies of reading instructions on writing instructions *if and + only if* there is exactly one writing instruction for the written + variable (temporary or argument). * ``priority=integer`` sets the instructions priority to the value ``integer``. Instructions with higher priority will be scheduled sooner, @@ -193,6 +194,10 @@ These are usually key-value pairs. The following attributes are recognized: instruction ahead of a higher-priority one if loop orders or dependencies require it. +* ``if=variable1:variable2`` Only execute this instruction if all condition + variables (which must be scalar variables) evaluate to ``true`` (as + defined by C). + .. autoclass:: ExpressionInstruction C Block Instructions diff --git a/loopy/check.py b/loopy/check.py index de89beac0..e39b28f99 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -44,7 +44,7 @@ def check_insn_attributes(kernel): % (insn.id, ", ".join( insn.forced_iname_deps - kernel.all_inames()))) - if not insn.insn_deps <= all_insn_ids: + if insn.insn_deps is not None and not insn.insn_deps <= all_insn_ids: raise LoopyError("insn '%s' has unknown instruction " "dependencies: %s" % (insn.id, ", ".join( diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index a3a35b082..d0a71dae6 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -177,7 +177,7 @@ def parse_insn(insn): raise if insn_match is not None: - insn_deps = set() + insn_deps = None insn_id = None priority = 0 forced_iname_deps = frozenset() @@ -204,7 +204,8 @@ def parse_insn(insn): elif opt_key == "priority": priority = int(opt_value) elif opt_key == "dep": - insn_deps = set(opt_value.split(":")) + insn_deps = frozenset(dep.strip() for dep in opt_value.split(":") + if dep.strip()) elif opt_key == "inames": forced_iname_deps = frozenset(opt_value.split(":")) elif opt_key == "if": diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 72e774c5e..df9b7f3ad 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -349,6 +349,8 @@ class InstructionBase(Record): *must* be executed before this one. Note that loop scheduling augments this by adding dependencies on any writes to temporaries read by this instruction. + May be *None* to invoke the default. + .. attribute:: predicates a :class:`frozenset` of variable names whose truth values (as defined @@ -384,7 +386,7 @@ class InstructionBase(Record): boostable, boostable_into, predicates): assert isinstance(forced_iname_deps, frozenset) - assert isinstance(insn_deps, set) + assert isinstance(insn_deps, frozenset) or insn_deps is None Record.__init__(self, id=id, @@ -500,8 +502,8 @@ class ExpressionInstruction(InstructionBase): def __init__(self, assignee, expression, - id=None, forced_iname_deps=frozenset(), insn_deps=set(), boostable=None, - boostable_into=None, + id=None, forced_iname_deps=frozenset(), insn_deps=None, + boostable=None, boostable_into=None, temp_var_type=None, priority=0, predicates=frozenset()): InstructionBase.__init__(self, @@ -640,7 +642,7 @@ class CInstruction(InstructionBase): def __init__(self, iname_exprs, code, read_variables=frozenset(), assignees=frozenset(), - id=None, insn_deps=set(), forced_iname_deps=frozenset(), priority=0, + id=None, insn_deps=None, forced_iname_deps=frozenset(), priority=0, boostable=None, boostable_into=None, predicates=frozenset()): """ :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples, diff --git a/loopy/preprocess.py b/loopy/preprocess.py index b7b3a916d..a656ce6fe 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -288,6 +288,52 @@ def mark_local_temporaries(kernel): # }}} +# {{{ default dependencies + +def add_default_dependencies(kernel): + logger.debug("%s: default deps" % kernel.name) + + writer_map = kernel.writer_map() + + arg_names = set(arg.name for arg in kernel.args) + + var_names = arg_names | set(kernel.temporary_variables.iterkeys()) + + dep_map = dict( + (insn.id, insn.read_dependency_names() & var_names) + for insn in kernel.instructions) + + new_insns = [] + for insn in kernel.instructions: + if insn.insn_deps is None: + auto_deps = set() + + # {{{ add automatic dependencies + + all_my_var_writers = set() + for var in dep_map[insn.id]: + var_writers = writer_map.get(var, set()) + all_my_var_writers |= var_writers + + if not var_writers and var not in arg_names: + warn(kernel, "read_no_write(%s)" % var, + "temporary variable '%s' is read, but never written." + % var) + + if len(var_writers) == 1: + auto_deps.update(var_writers - set([insn.id])) + + # }}} + + insn = insn.copy(insn_deps=frozenset(auto_deps)) + + new_insns.append(insn) + + return kernel.copy(instructions=new_insns) + +# }}} + + # {{{ rewrite reduction to imperative form def realize_reduction(kernel, insn_id_filter=None): @@ -333,33 +379,34 @@ def realize_reduction(kernel, insn_id_filter=None): is_local=False) outer_insn_inames = temp_kernel.insn_inames(insn) - bad_inames = set(expr.inames) & outer_insn_inames + bad_inames = frozenset(expr.inames) & outer_insn_inames if bad_inames: raise LoopyError("reduction used within loop(s) that it was " "supposed to reduce over: " + ", ".join(bad_inames)) - new_id = temp_kernel.make_unique_instruction_id( + init_id = temp_kernel.make_unique_instruction_id( based_on="%s_%s_init" % (insn.id, "_".join(expr.inames)), extra_used_ids=set(i.id for i in generated_insns)) init_insn = ExpressionInstruction( - id=new_id, + id=init_id, assignee=target_var, - forced_iname_deps=outer_insn_inames - set(expr.inames), + forced_iname_deps=outer_insn_inames - frozenset(expr.inames), + insn_deps=frozenset(), expression=expr.operation.neutral_element(arg_dtype, expr.inames)) generated_insns.append(init_insn) - new_id = temp_kernel.make_unique_instruction_id( + update_id = temp_kernel.make_unique_instruction_id( based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)), extra_used_ids=set(i.id for i in generated_insns)) reduction_insn = ExpressionInstruction( - id=new_id, + id=update_id, assignee=target_var, expression=expr.operation( arg_dtype, target_var, expr.expr, expr.inames), - insn_deps=set([init_insn.id]) | insn.insn_deps, + insn_deps=frozenset([init_insn.id]) | insn.insn_deps, forced_iname_deps=temp_kernel.insn_inames(insn) | set(expr.inames)) generated_insns.append(reduction_insn) @@ -397,7 +444,7 @@ def realize_reduction(kernel, insn_id_filter=None): insn = insn.copy( expression=new_expression, insn_deps=insn.insn_deps - | new_insn_insn_deps, + | frozenset(new_insn_insn_deps), forced_iname_deps=temp_kernel.insn_inames(insn)) insn_queue = generated_insns + [insn] + insn_queue @@ -544,10 +591,10 @@ def duplicate_private_temporaries_for_ilp(kernel): # }}} -# {{{ automatic dependencies, find boostability of instructions +# {{{ find boostability of instructions -def add_boostability_and_automatic_dependencies(kernel): - logger.debug("%s: automatic deps, boostability" % kernel.name) +def find_boostability(kernel): + logger.debug("%s: boostability" % kernel.name) writer_map = kernel.writer_map() @@ -563,31 +610,11 @@ def add_boostability_and_automatic_dependencies(kernel): new_insns = [] for insn in kernel.instructions: - auto_deps = set() - - # {{{ add automatic dependencies - all_my_var_writers = set() for var in dep_map[insn.id]: var_writers = writer_map.get(var, set()) all_my_var_writers |= var_writers - if not var_writers and var not in arg_names: - warn(kernel, "read_no_write(%s)" % var, - "temporary variable '%s' is read, but never written." % var) - - if len(var_writers) > 1 and not var_writers & set(insn.insn_deps): - warn(kernel, "read_without_dep(%s,%s)" % (var, insn.id), - "'%s' is written from more than one place, " - "but instruction '%s' (which reads this variable) " - "does not specify a dependency on any of the writers." - % (var, insn.id)) - - if len(var_writers) == 1: - auto_deps.update(var_writers - set([insn.id])) - - # }}} - # {{{ find dependency loops, flag boostability while True: @@ -609,10 +636,9 @@ def add_boostability_and_automatic_dependencies(kernel): non_boostable_vars.update( var_name for var_name, _ in insn.assignees_and_indices()) - new_insns.append( - insn.copy( - insn_deps=insn.insn_deps | auto_deps, - boostable=boostable)) + insn = insn.copy(boostable=boostable) + + new_insns.append(insn) # {{{ remove boostability from isns that access non-boostable vars @@ -1029,9 +1055,16 @@ def preprocess_kernel(kernel): kernel = infer_unknown_types(kernel, expect_completion=False) - # Ordering restriction: - # realize_reduction must happen after type inference because it needs - # to be able to determine the types of the reduced expressions. + kernel = add_default_dependencies(kernel) + + # Ordering restrictions: + # + # - realize_reduction must happen after type inference because it needs + # to be able to determine the types of the reduced expressions. + # + # - realize_reduction must happen after default dependencies are added + # because it manipulates the insn_deps field, which could prevent + # defaults from being applied. kernel = realize_reduction(kernel) @@ -1042,7 +1075,7 @@ def preprocess_kernel(kernel): kernel = duplicate_private_temporaries_for_ilp(kernel) kernel = mark_local_temporaries(kernel) kernel = assign_automatic_axes(kernel) - kernel = add_boostability_and_automatic_dependencies(kernel) + kernel = find_boostability(kernel) kernel = limit_boostability(kernel) kernel = adjust_local_temp_var_storage(kernel) diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index 819de845d..2de1db43b 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -24,6 +24,7 @@ THE SOFTWARE. import numpy as np +import pyopencl as cl # noqa import loopy as lp from pyopencl.tools import ( # noqa -- GitLab