diff --git a/loopy/__init__.py b/loopy/__init__.py index 5ae1a1291a956841f3ad7683757135b052e77ade..dc4e7bf322d43bfc36c4c51ee7e2df564f8f6c70 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -51,7 +51,7 @@ from loopy.library.symbol import opencl_symbol_mangler from loopy.kernel.data import ( ValueArg, GlobalArg, ConstantArg, ImageArg, - Instruction) + ExpressionInstruction, CInstruction) from loopy.kernel import LoopKernel from loopy.kernel.tools import ( @@ -72,9 +72,12 @@ from loopy.auto_test import auto_test_vs_ref __all__ = [ "auto", - "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", + "LoopKernel", - "Instruction", + + "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", + "ExpressionInstruction", "CInstruction", + "default_function_mangler", "single_arg_function_mangler", "opencl_function_mangler", "opencl_symbol_mangler", "default_preamble_generator", diff --git a/loopy/check.py b/loopy/check.py index dff150f2434b3063f357dbfc547d1b1211ec8e3a..b48b39a8b828d4ba188c02a77079ba10ba3080e8 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -161,68 +161,68 @@ def check_for_write_races(kernel): iname_to_tag = kernel.iname_to_tag.get for insn in kernel.instructions: - assignee_name = insn.get_assignee_var_name() - assignee_indices = depmap(insn.get_assignee_indices()) + for assignee_name, assignee_indices in insn.assignees_and_indices(): + assignee_indices = depmap(assignee_indices) - def strip_var(expr): - from pymbolic.primitives import Variable - assert isinstance(expr, Variable) - return expr.name + def strip_var(expr): + from pymbolic.primitives import Variable + assert isinstance(expr, Variable) + return expr.name - assignee_indices = set(strip_var(index) for index in assignee_indices) + assignee_indices = set(strip_var(index) for index in assignee_indices) - assignee_inames = assignee_indices & kernel.all_inames() - if not assignee_inames <= kernel.insn_inames(insn): - raise RuntimeError( - "assignee of instructiosn '%s' references " - "iname that the instruction does not depend on" - % insn.id) - - if assignee_name in kernel.arg_dict: - # Any parallel tags that are not depended upon by the assignee - # will cause write races. + assignee_inames = assignee_indices & kernel.all_inames() + if not assignee_inames <= kernel.insn_inames(insn): + raise RuntimeError( + "assignee of instructiosn '%s' references " + "iname that the instruction does not depend on" + % insn.id) - raceable_parallel_insn_inames = set( - iname - for iname in kernel.insn_inames(insn) - if isinstance(iname_to_tag(iname), ParallelTag)) - - elif assignee_name in kernel.temporary_variables: - temp_var = kernel.temporary_variables[assignee_name] - if temp_var.is_local is True: - raceable_parallel_insn_inames = set( - iname - for iname in kernel.insn_inames(insn) - if isinstance(iname_to_tag(iname), ParallelTag) - and not isinstance(iname_to_tag(iname), GroupIndexTag)) + if assignee_name in kernel.arg_dict: + # Any parallel tags that are not depended upon by the assignee + # will cause write races. - elif temp_var.is_local is False: raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) - if isinstance(iname_to_tag(iname), ParallelTag) - and not isinstance(iname_to_tag(iname), - GroupIndexTag) - and not isinstance(iname_to_tag(iname), - LocalIndexTagBase)) + if isinstance(iname_to_tag(iname), ParallelTag)) + + elif assignee_name in kernel.temporary_variables: + temp_var = kernel.temporary_variables[assignee_name] + if temp_var.is_local is True: + raceable_parallel_insn_inames = set( + iname + for iname in kernel.insn_inames(insn) + if isinstance(iname_to_tag(iname), ParallelTag) + and not isinstance(iname_to_tag(iname), GroupIndexTag)) + + elif temp_var.is_local is False: + raceable_parallel_insn_inames = set( + iname + for iname in kernel.insn_inames(insn) + if isinstance(iname_to_tag(iname), ParallelTag) + and not isinstance(iname_to_tag(iname), + GroupIndexTag) + and not isinstance(iname_to_tag(iname), + LocalIndexTagBase)) + + else: + raise RuntimeError("temp var '%s' hasn't decided on " + "whether it is local" % temp_var.name) else: - raise RuntimeError("temp var '%s' hasn't decided on " - "whether it is local" % temp_var.name) + raise RuntimeError("invalid assignee name in instruction '%s'" + % insn.id) - else: - raise RuntimeError("invalid assignee name in instruction '%s'" - % insn.id) + race_inames = \ + raceable_parallel_insn_inames - assignee_inames - race_inames = \ - raceable_parallel_insn_inames - assignee_inames - - if race_inames: - raise WriteRaceConditionError( - "instruction '%s' contains a write race: " - "instruction will be run across parallel iname(s) '%s', which " - "is/are not referenced in the lhs index" - % (insn.id, ",".join(race_inames))) + if race_inames: + raise WriteRaceConditionError( + "instruction '%s' contains a write race: " + "instruction will be run across parallel iname(s) " + "'%s', which is/are not referenced in the lhs index" + % (insn.id, ",".join(race_inames))) def check_for_orphaned_user_hardware_axes(kernel): @@ -337,26 +337,25 @@ def check_bounds(kernel): def check_write_destinations(kernel): for insn in kernel.instructions: - wvar = insn.get_assignee_var_name() - - if wvar in kernel.all_inames(): - raise RuntimeError("iname '%s' may not be written" % wvar) + for wvar, _ in insn.assignees_and_indices(): + if wvar in kernel.all_inames(): + raise RuntimeError("iname '%s' may not be written" % wvar) - insn_domain = kernel.get_inames_domain(kernel.insn_inames(insn)) - insn_params = set(insn_domain.get_var_names(dim_type.param)) + insn_domain = kernel.get_inames_domain(kernel.insn_inames(insn)) + insn_params = set(insn_domain.get_var_names(dim_type.param)) - if wvar in kernel.all_params(): - if wvar not in kernel.temporary_variables: - raise RuntimeError("domain parameter '%s' may not be written" - "--it is not a temporary variable" % wvar) + if wvar in kernel.all_params(): + if wvar not in kernel.temporary_variables: + raise RuntimeError("domain parameter '%s' may not be written" + "--it is not a temporary variable" % wvar) - if wvar in insn_params: - raise RuntimeError("domain parameter '%s' may not be written " - "inside a domain dependent on it" % wvar) + if wvar in insn_params: + raise RuntimeError("domain parameter '%s' may not be written " + "inside a domain dependent on it" % wvar) - if not (wvar in kernel.temporary_variables - or wvar in kernel.arg_dict) and wvar not in kernel.all_params(): - raise RuntimeError + if not (wvar in kernel.temporary_variables + or wvar in kernel.arg_dict) and wvar not in kernel.all_params(): + raise RuntimeError # }}} diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index ab603aa6f6fd334a851ff3a58bb29af52b2ccafb..5015d6234725e88a8489684fef8acc76c021d10f 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -49,13 +49,25 @@ def wrap_in_bounds_checks(ccm, domain, check_inames, implemented_domain, stmt): def generate_instruction_code(kernel, insn, codegen_state): + from loopy.kernel.data import ExpressionInstruction, CInstruction + + if isinstance(insn, ExpressionInstruction): + return generate_expr_instruction_code(kernel, insn, codegen_state) + elif isinstance(insn, CInstruction): + return generate_c_instruction_code(kernel, insn, codegen_state) + else: + raise RuntimeError("unexpected instruction type") + + +def generate_expr_instruction_code(kernel, insn, codegen_state): from loopy.codegen import GeneratedInstruction ccm = codegen_state.c_code_mapper expr = insn.expression - target_dtype = kernel.get_var_descriptor(insn.get_assignee_var_name()).dtype + (assignee_var_name, assignee_indices), = insn.assignees_and_indices() + target_dtype = kernel.get_var_descriptor(assignee_var_name).dtype from cgen import Assign from loopy.codegen.expression import dtype_to_type_context @@ -78,17 +90,16 @@ def generate_instruction_code(kernel, insn, codegen_state): if 0: from loopy.codegen import gen_code_block from cgen import Statement as S - idx = insn.get_assignee_indices() - if idx: + if assignee_indices: result = gen_code_block([ GeneratedInstruction( ast=S(r'printf("write %s[%s]\n", %s);' - % (insn.get_assignee_var_name(), - ",".join(len(idx) * ["%d"]), + % (assignee_var_name, + ",".join(len(assignee_indices) * ["%d"]), ",".join( ccm(i, prec=None, type_context="i") - for i in idx))), + for i in assignee_indices))), implemented_domain=None), result ]) @@ -96,4 +107,7 @@ def generate_instruction_code(kernel, insn, codegen_state): return result +def generate_c_instruction_code(kernel, insn, codegen_state): + raise NotImplementedError + # vim: foldmethod=marker diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 30566942def8b13db3a844b3f7f0ccee8a5d8e4e..68dac0daede7cecf2d1247d794d9029c556e0446 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -579,8 +579,8 @@ class LoopKernel(Record): return result def insn_inames(self, insn): - from loopy.kernel.data import Instruction - if isinstance(insn, Instruction): + from loopy.kernel.data import InstructionBase + if isinstance(insn, InstructionBase): return self.all_insn_inames()[insn.id] else: return self.all_insn_inames()[insn] @@ -612,7 +612,7 @@ class LoopKernel(Record): | set(self.temporary_variables.iterkeys())) for insn in self.instructions: - for var_name in insn.get_read_var_names() & admissible_vars: + for var_name in insn.read_dependency_names() & admissible_vars: result.setdefault(var_name, set()).add(insn.id) @memoize_method @@ -624,10 +624,7 @@ class LoopKernel(Record): result = {} for insn in self.instructions: - var_name = insn.get_assignee_var_name() - var_names = [var_name] - - for var_name in var_names: + for var_name, _ in insn.assignees_and_indices(): result.setdefault(var_name, set()).add(insn.id) return result @@ -636,14 +633,15 @@ class LoopKernel(Record): def get_read_variables(self): result = set() for insn in self.instructions: - result.update(insn.get_read_var_names()) + result.update(insn.read_dependency_names()) return result @memoize_method def get_written_variables(self): return frozenset( - insn.get_assignee_var_name() - for insn in self.instructions) + var_name + for insn in self.instructions + for var_name, _ in insn.assignees_and_indices()) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 0bf9bb548f7c5f6cf688eea6f1aa4d2bdaba1139..f4cd45c27444a2a33c3d0ac8d9edda5f392e35e6 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -27,7 +27,8 @@ THE SOFTWARE. import numpy as np from loopy.symbolic import IdentityMapper, WalkMapper -from loopy.kernel.data import Instruction, SubstitutionRule +from loopy.kernel.data import ( + InstructionBase, ExpressionInstruction, SubstitutionRule) import islpy as isl from islpy import dim_type @@ -204,7 +205,7 @@ def parse_insn(insn): raise RuntimeError("left hand side of assignment '%s' must " "be variable or subscript" % lhs) - return Instruction( + return ExpressionInstruction( id=insn_id, insn_deps=insn_deps, forced_iname_deps=frozenset(), @@ -240,7 +241,7 @@ def parse_insn(insn): def parse_if_necessary(insn, defines): - if isinstance(insn, Instruction): + if isinstance(insn, InstructionBase): yield insn return elif not isinstance(insn, str): @@ -398,7 +399,8 @@ def guess_kernel_args_if_requested(domains, instructions, temporary_variables, for insn in instructions: if insn.temp_var_type is not None: - temp_var_names.add(insn.get_assignee_var_name()) + (assignee_var_name, _), = insn.assignees_and_indices() + temp_var_names.add(assignee_var_name) # }}} @@ -414,9 +416,11 @@ def guess_kernel_args_if_requested(domains, instructions, temporary_variables, all_written_names = set() from loopy.symbolic import get_dependencies for insn in instructions: - all_written_names.add(insn.get_assignee_var_name()) - all_names.update(get_dependencies(submap(insn.assignee, insn.id))) - all_names.update(get_dependencies(submap(insn.expression, insn.id))) + if isinstance(insn, ExpressionInstruction): + (assignee_var_name, _), = insn.assignees_and_indices() + all_written_names.add(assignee_var_name) + all_names.update(get_dependencies(submap(insn.assignee, insn.id))) + all_names.update(get_dependencies(submap(insn.expression, insn.id))) all_params = set() for dom in domains: @@ -558,11 +562,10 @@ def check_written_variable_names(knl): | set(knl.temporary_variables.iterkeys())) for insn in knl.instructions: - var_name = insn.get_assignee_var_name() - - if var_name not in admissible_vars: - raise RuntimeError("variable '%s' not declared or not " - "allowed for writing" % var_name) + for var_name, _ in insn.assignees_and_indices(): + if var_name not in admissible_vars: + raise RuntimeError("variable '%s' not declared or not " + "allowed for writing" % var_name) # }}} @@ -616,7 +619,7 @@ def expand_cses(knl): shape=()) from pymbolic.primitives import Variable - insn = Instruction( + insn = ExpressionInstruction( id=knl.make_unique_instruction_id( extra_used_ids=newly_created_insn_ids), assignee=Variable(new_var_name), expression=expr) @@ -635,7 +638,10 @@ def expand_cses(knl): new_temp_vars = knl.temporary_variables.copy() for insn in knl.instructions: - new_insns.append(insn.copy(expression=cseam(insn.expression))) + if isinstance(insn, ExpressionInstruction): + new_insns.append(insn.copy(expression=cseam(insn.expression))) + else: + new_insns.append(insn) return knl.copy( instructions=new_insns, @@ -653,10 +659,13 @@ def create_temporaries(knl): from loopy.symbolic import AccessRangeMapper for insn in knl.instructions: + if not isinstance(insn, ExpressionInstruction): + continue + from loopy.kernel.data import TemporaryVariable if insn.temp_var_type is not None: - assignee_name = insn.get_assignee_var_name() + (assignee_name, _), = insn.assignees_and_indices() armap = AccessRangeMapper(knl, assignee_name) armap(insn.assignee, knl.insn_inames(insn)) @@ -880,7 +889,7 @@ def make_kernel(device, domains, instructions, kernel_args=["..."], **kwargs): instructions = [instructions] for insn in instructions: for new_insn in parse_if_necessary(insn, defines): - if isinstance(new_insn, Instruction): + if isinstance(new_insn, InstructionBase): parsed_instructions.append(new_insn) elif isinstance(new_insn, SubstitutionRule): substitutions[new_insn.name] = new_insn diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9de7f133edc7a96b7cc91186b7bbd48f25464eef..c0dcd036fbab2af64404e0b225a1b3b19b30f934 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -304,27 +304,29 @@ class SubstitutionRule(Record): # {{{ instruction -class Instruction(Record): +class InstructionBase(Record): """ .. attribute:: id An (otherwise meaningless) identifier that is unique within - a :class:`LoopKernel`. + a :class:`loopy.kernel.LoopKernel`. - .. attribute:: assignee + .. attribute:: insn_deps - .. attribute:: expression + a list of ids of :class:`Instruction` instances that + *must* be executed before this one. Note that loop scheduling augments this + by adding dependencies on any writes to temporaries read by this instruction. .. attribute:: forced_iname_deps - a set of inames that are added to the list of iname - dependencies + A :class:`frozenset` of inames that are added to the list of iname + dependencies. - .. attribute:: insn_deps + .. attribute:: priority + + Scheduling priority, an integer. Higher means 'execute sooner'. + Default 0. - a list of ids of :class:`Instruction` instances that - *must* be executed before this one. Note that loop scheduling augments this - by adding dependencies on any writes to temporaries read by this instruction. .. attribute:: boostable Whether the instruction may safely be executed inside more loops than @@ -333,10 +335,72 @@ class Instruction(Record): .. attribute:: boostable_into - a set of inames into which the instruction + A :class:`set` of inames into which the instruction may need to be boosted, as a heuristic help for the scheduler. + Also allowed to be *None*. + """ + + fields = set("id insn_deps forced_iname_deps " + "priority boostable boostable_into".split()) + + def __init__(self, id, insn_deps, forced_iname_deps, priority, + boostable, boostable_into): + + assert isinstance(forced_iname_deps, frozenset) + assert isinstance(insn_deps, set) + + Record.__init__(self, + id=id, + insn_deps=insn_deps, + forced_iname_deps=forced_iname_deps, + priority=priority, + boostable=boostable, + boostable_into=boostable_into) + + # {{{ abstract interface + + def read_dependency_names(self): + raise NotImplementedError + + def reduction_inames(self): + raise NotImplementedError - .. attribute:: priority: scheduling priority + def assignees_and_indices(self): + """Return a list of tuples *(assignee_var_name, subscript)* + where assignee_var_name is a string representing an assigned + variable name and subscript is a :class:`tuple`. + """ + raise NotImplementedError + + # }}} + + @memoize_method + def write_dependency_names(self): + """Return a set of dependencies of the left hand side of the + assignments performed by this instruction, including written variables + and indices. + """ + + result = set() + for assignee, indices in self.assignees_and_indices(): + result.add(assignee) + from loopy.symbolic import get_dependencies + result.update(get_dependencies(indices)) + + return result + + def dependency_names(self): + return self.read_dependency_names() | self.write_dependency_names() + + def assignee_var_names(self): + return (var_name for var_name, _ in self.assignees_and_indices()) + + +class ExpressionInstruction(InstructionBase): + """ + .. attribute:: assignee + + .. attribute:: expression The following instance variables are only used until :func:`loopy.make_kernel` is finished: @@ -347,28 +411,38 @@ class Instruction(Record): created from the assignee """ + fields = InstructionBase.fields | \ + set("assignee expression temp_var_type".split()) + def __init__(self, id, assignee, expression, forced_iname_deps=frozenset(), insn_deps=set(), boostable=None, boostable_into=None, temp_var_type=None, priority=0): + InstructionBase.__init__(self, + id=id, + forced_iname_deps=forced_iname_deps, + insn_deps=insn_deps, boostable=boostable, + boostable_into=boostable_into, + priority=priority) + from loopy.symbolic import parse if isinstance(assignee, str): assignee = parse(assignee) if isinstance(expression, str): assignee = parse(expression) - assert isinstance(forced_iname_deps, frozenset) - assert isinstance(insn_deps, set) + self.assignee = assignee + self.expression = expression + self.temp_var_type = temp_var_type - Record.__init__(self, - id=id, assignee=assignee, expression=expression, - forced_iname_deps=forced_iname_deps, - insn_deps=insn_deps, boostable=boostable, - boostable_into=boostable_into, - temp_var_type=temp_var_type, - priority=priority) + # {{{ implement InstructionBase interface + + @memoize_method + def read_dependency_names(self): + from loopy.symbolic import get_dependencies + return get_dependencies(self.expression) @memoize_method def reduction_inames(self): @@ -385,6 +459,29 @@ class Instruction(Record): return result + @memoize_method + def assignees_and_indices(self): + from pymbolic.primitives import Variable, Subscript + + if isinstance(self.assignee, Variable): + return [(self.assignee.name, ())] + elif isinstance(self.assignee, Subscript): + agg = self.assignee.aggregate + assert isinstance(agg, Variable) + var_name = agg.name + + idx = self.assignee.index + if not isinstance(idx, tuple): + idx = (idx,) + + return [(agg.name, idx)] + else: + raise RuntimeError("invalid lvalue '%s'" % self.assignee) + + return var_name + + # }}} + def __str__(self): result = "%s: %s <- %s" % (self.id, self.assignee, self.expression) @@ -410,39 +507,9 @@ class Instruction(Record): return result - @memoize_method - def get_assignee_var_name(self): - from pymbolic.primitives import Variable, Subscript - - if isinstance(self.assignee, Variable): - var_name = self.assignee.name - elif isinstance(self.assignee, Subscript): - agg = self.assignee.aggregate - assert isinstance(agg, Variable) - var_name = agg.name - else: - raise RuntimeError("invalid lvalue '%s'" % self.assignee) - - return var_name - @memoize_method - def get_assignee_indices(self): - from pymbolic.primitives import Variable, Subscript - - if isinstance(self.assignee, Variable): - return () - elif isinstance(self.assignee, Subscript): - result = self.assignee.index - if not isinstance(result, tuple): - result = (result,) - return result - else: - raise RuntimeError("invalid lvalue '%s'" % self.assignee) - - @memoize_method - def get_read_var_names(self): - from loopy.symbolic import get_dependencies - return get_dependencies(self.expression) +class CInstruction(InstructionBase): + pass # }}} diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index d9b8169abc728ff9197b9d0416d2e14ff57548b8..51cc266db86da54764c6dfea1207e406f7b182a1 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -79,8 +79,6 @@ def add_and_infer_argument_dtypes(knl, dtype_dict): # {{{ find_all_insn_inames fixed point iteration def find_all_insn_inames(kernel): - from loopy.symbolic import get_dependencies - writer_map = kernel.writer_map() insn_id_to_inames = {} @@ -93,8 +91,8 @@ def find_all_insn_inames(kernel): kernel = expand_subst(kernel) for insn in kernel.instructions: - all_read_deps[insn.id] = read_deps = get_dependencies(insn.expression) - all_write_deps[insn.id] = write_deps = get_dependencies(insn.assignee) + all_read_deps[insn.id] = read_deps = insn.read_dependency_names() + all_write_deps[insn.id] = write_deps = insn.write_dependency_names() deps = read_deps | write_deps iname_deps = ( diff --git a/loopy/maxima.py b/loopy/maxima.py index 51c7c090b3af8f6c7ee144f6423037f4947c93ee..957f8e04988153a5a61da6d6ba55cb3f51e562f2 100644 --- a/loopy/maxima.py +++ b/loopy/maxima.py @@ -58,7 +58,10 @@ def get_loopy_instructions_as_maxima(kernel, prefix): kernel = add_boostability_and_automatic_dependencies(kernel) my_variable_names = ( - insn.get_assignee_var_name() for insn in kernel.instructions) + avn + for insn in kernel.instructions + for avn, _ in insn.assignees_and_indices() + ) from pymbolic import var subst_dict = dict( @@ -73,18 +76,22 @@ def get_loopy_instructions_as_maxima(kernel, prefix): written_insn_ids = set() - from loopy.kernel import Instruction + from loopy.kernel import InstructionBase, ExpressionInstruction def write_insn(insn): - if not isinstance(insn, Instruction): + if not isinstance(insn, InstructionBase): insn = kernel.id_to_insn[insn] + if not isinstance(insn, ExpressionInstruction): + raise RuntimeError("non-expression instructions not supported " + "in maxima export") for dep in insn.insn_deps: if dep not in written_insn_ids: write_insn(dep) + (aname, _), = insn.assignees_and_indices() result.append("%s%s : %s;" % ( - prefix, insn.get_assignee_var_name(), + prefix, aname, mstr(substitute(insn.expression)))) written_insn_ids.add(insn.id) diff --git a/loopy/precompute.py b/loopy/precompute.py index 6d9cc491308d220faee337fe4344799b51754a33..1c6634b13d108e0864395a37e2812c705e4068cf 100644 --- a/loopy/precompute.py +++ b/loopy/precompute.py @@ -829,8 +829,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, ))) (compute_expr)) - from loopy.kernel.data import Instruction - compute_insn = Instruction( + from loopy.kernel.data import ExpressionInstruction + compute_insn = ExpressionInstruction( id=kernel.make_unique_instruction_id(based_on=c_subst_name), assignee=assignee, expression=compute_expr) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index fb33c152cf08bc217c4011c20c2f4ccc5b9642fa..f71d98bbaf13067bd6be31aee5868ff627f80bee 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -216,8 +216,8 @@ def mark_local_temporaries(kernel): if isinstance(kernel.iname_to_tag.get(iname), LocalIndexTagBase)) locparallel_assignee_inames = set(iname - for iname in - get_dependencies(insn.get_assignee_indices()) + for _, assignee_indices in insn.assignees_and_indices() + for iname in get_dependencies(assignee_indices) & kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), LocalIndexTagBase)) @@ -295,7 +295,7 @@ def realize_reduction(kernel, insn_id_filter=None): arg_dtype = type_inf_mapper(expr.expr) - from loopy.kernel.data import Instruction, TemporaryVariable + from loopy.kernel.data import ExpressionInstruction, TemporaryVariable new_temporary_variables[target_var_name] = TemporaryVariable( name=target_var_name, @@ -313,7 +313,7 @@ def realize_reduction(kernel, insn_id_filter=None): based_on="%s_%s_init" % (insn.id, "_".join(expr.inames)), extra_used_ids=set(i.id for i in generated_insns)) - init_insn = Instruction( + init_insn = ExpressionInstruction( id=new_id, assignee=target_var, forced_iname_deps=outer_insn_inames - set(expr.inames), @@ -325,7 +325,7 @@ def realize_reduction(kernel, insn_id_filter=None): based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)), extra_used_ids=set(i.id for i in generated_insns)) - reduction_insn = Instruction( + reduction_insn = ExpressionInstruction( id=new_id, assignee=target_var, expression=expr.operation( @@ -523,14 +523,9 @@ def add_boostability_and_automatic_dependencies(kernel): var_names = arg_names | set(kernel.temporary_variables.iterkeys()) - from loopy.symbolic import DependencyMapper - dm = DependencyMapper(composite_leaves=False) - dep_map = {} - - for insn in kernel.instructions: - dep_map[insn.id] = ( - set(var.name for var in dm(insn.expression)) - & var_names) + dep_map = dict( + (insn.id, insn.read_dependency_names() & var_names) + for insn in kernel.instructions) non_boostable_vars = set() @@ -579,7 +574,8 @@ def add_boostability_and_automatic_dependencies(kernel): boostable = insn.id not in all_my_var_writers if not boostable: - non_boostable_vars.add(insn.get_assignee_var_name()) + non_boostable_vars.update( + var_name for var_name, _ in insn.assignees_and_indices()) new_insns.append( insn.copy( @@ -590,10 +586,7 @@ def add_boostability_and_automatic_dependencies(kernel): new2_insns = [] for insn in new_insns: - accessed_vars = ( - set([insn.get_assignee_var_name()]) - | insn.get_read_var_names()) - + accessed_vars = insn.dependency_names() boostable = insn.boostable and not bool(non_boostable_vars & accessed_vars) new2_insns.append(insn.copy(boostable=boostable)) diff --git a/loopy/schedule.py b/loopy/schedule.py index 75fa263fbeaf1280e5e06f114f38db10610f48d6..c60505204386585d06ba82d28fbd9e03b26c51fb 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -77,19 +77,19 @@ def gather_schedule_subloop(schedule, start_idx): def get_barrier_needing_dependency(kernel, target, source, unordered=False): - from loopy.kernel.data import Instruction - if not isinstance(source, Instruction): + from loopy.kernel.data import InstructionBase + if not isinstance(source, InstructionBase): source = kernel.id_to_insn[source] - if not isinstance(target, Instruction): + if not isinstance(target, InstructionBase): target = kernel.id_to_insn[target] local_vars = kernel.local_var_names() - tgt_write = set([target.get_assignee_var_name()]) & local_vars - tgt_read = target.get_read_var_names() & local_vars + tgt_write = set(target.assignee_var_names()) & local_vars + tgt_read = target.read_dependency_names() & local_vars - src_write = set([source.get_assignee_var_name()]) & local_vars - src_read = source.get_read_var_names() & local_vars + src_write = set(source.assignee_var_names()) & local_vars + src_read = source.read_dependency_names() & local_vars waw = tgt_write & src_write raw = tgt_read & src_write @@ -764,19 +764,19 @@ def insert_barriers(kernel, schedule, level=0): # }}} - assignee_temp_var = kernel.temporary_variables.get( - insn.get_assignee_var_name()) - if assignee_temp_var is not None and assignee_temp_var.is_local: - dep = get_barrier_dependent_in_schedule(kernel, insn.id, schedule, - unordered=True) + for assignee_name in insn.assignee_var_names(): + assignee_temp_var = kernel.temporary_variables.get( + assignee_name) + if assignee_temp_var is not None and assignee_temp_var.is_local: + dep = get_barrier_dependent_in_schedule( + kernel, insn.id, schedule, + unordered=True) - if dep: - issue_barrier(is_pre_barrier=True, dep=dep) + if dep: + issue_barrier(is_pre_barrier=True, dep=dep) - result.append(sched_item) - owed_barriers.add(insn.id) - else: - result.append(sched_item) + owed_barriers.add(insn.id) + result.append(sched_item) else: assert False diff --git a/test/test_loopy.py b/test/test_loopy.py index 29a46651e4a55d17d4bb5b2a34663fa216240aee..5407fea583ef1360923f34fbe428e96e8bd4e97d 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -481,7 +481,7 @@ def test_fuzz_code_generator(ctx_factory): return np.float64 knl = lp.make_kernel(ctx.devices[0], "{ : }", - [lp.Instruction(None, "value", expr)], + [lp.ExpressionInstruction(None, "value", expr)], [lp.GlobalArg("value", np.complex128, shape=())] + [ lp.ValueArg(name, get_dtype(val)) @@ -615,7 +615,7 @@ def test_recursive_nested_dependent_reduction(ctx_factory): assumptions="ntgts>=1") cknl = lp.CompiledKernel(ctx, knl) - print cknl.code + print cknl.get_code() # FIXME: Actually test functionality.