diff --git a/contrib/floopy-highlighting/floopy.vim b/contrib/floopy-highlighting/floopy.vim index 59c5b15a431e3c28072c96afd824b6937f973b56..57c09a652c0cb9141d6764d300ebb3618577b05d 100644 --- a/contrib/floopy-highlighting/floopy.vim +++ b/contrib/floopy-highlighting/floopy.vim @@ -7,7 +7,7 @@ " :set filetype=floopy " " You may also include a line -" vim: filetype=pyopencl.python +" vim: filetype=floopy.python " at the end of your file to set the file type automatically. " " Another option is to include the following in your .vimrc @@ -16,24 +16,7 @@ runtime! syntax/fortran.vim unlet b:current_syntax -try - syntax include @clCode syntax/opencl.vim -catch - syntax include @clCode syntax/c.vim -endtry - -if exists('b:current_syntax') - let s:current_syntax=b:current_syntax - " Remove current syntax definition, as some syntax files (e.g. cpp.vim) - " do nothing if b:current_syntax is defined. - unlet b:current_syntax -endif - syntax include @LoopyPython syntax/python.vim -try - syntax include @LoopyPython after/syntax/python.vim -catch -endtry if exists('s:current_syntax') let b:current_syntax=s:current_syntax @@ -43,6 +26,6 @@ endif syntax region textSnipLoopyPython \ matchgroup=Comment -\ start='$loopy begin transform' end='$loopy end transform' +\ start='$loopy begin' end='$loopy end' \ containedin=ALL \ contains=@LoopyPython diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 1f64e59e6c9e8ab6d599ea0cc61937df5a60ffcb..f930df60c93b1be4df80bcfd2789c60dca1e3654 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -902,43 +902,54 @@ class LoopKernel(RecordWithoutPickling): def __str__(self): lines = [] + from loopy.preprocess import add_default_dependencies + kernel = add_default_dependencies(self) + sep = 75*"-" lines.append(sep) - lines.append("KERNEL: " + self.name) + lines.append("KERNEL: " + kernel.name) lines.append(sep) lines.append("ARGUMENTS:") - for arg_name in sorted(self.arg_dict): - lines.append(str(self.arg_dict[arg_name])) + for arg_name in sorted(kernel.arg_dict): + lines.append(str(kernel.arg_dict[arg_name])) lines.append(sep) lines.append("DOMAINS:") - for dom, parents in zip(self.domains, self.all_parents_per_domain()): + for dom, parents in zip(kernel.domains, kernel.all_parents_per_domain()): lines.append(len(parents)*" " + str(dom)) lines.append(sep) lines.append("INAME IMPLEMENTATION TAGS:") - for iname in sorted(self.all_inames()): - line = "%s: %s" % (iname, self.iname_to_tag.get(iname)) + for iname in sorted(kernel.all_inames()): + line = "%s: %s" % (iname, kernel.iname_to_tag.get(iname)) lines.append(line) - if self.temporary_variables: + if kernel.temporary_variables: lines.append(sep) lines.append("TEMPORARIES:") - for tv in sorted(six.itervalues(self.temporary_variables), + for tv in sorted(six.itervalues(kernel.temporary_variables), key=lambda tv: tv.name): lines.append(str(tv)) - if self.substitutions: + if kernel.substitutions: lines.append(sep) lines.append("SUBSTIUTION RULES:") - for rule_name in sorted(six.iterkeys(self.substitutions)): - lines.append(str(self.substitutions[rule_name])) + for rule_name in sorted(six.iterkeys(kernel.substitutions)): + lines.append(str(kernel.substitutions[rule_name])) lines.append(sep) lines.append("INSTRUCTIONS:") loop_list_width = 35 - import loopy as lp - for insn in self.instructions: + printed_insn_ids = set() + + def print_insn(insn): + if insn.id in printed_insn_ids: + return + printed_insn_ids.add(insn.id) + + for dep_id in insn.insn_deps: + print_insn(kernel.id_to_insn[dep_id]) + if isinstance(insn, lp.ExpressionInstruction): lhs = str(insn.assignee) rhs = str(insn.expression) @@ -952,7 +963,7 @@ class LoopKernel(RecordWithoutPickling): trailing = [" "+l for l in insn.code.split("\n")] - loop_list = ",".join(sorted(self.insn_inames(insn))) + loop_list = ",".join(sorted(kernel.insn_inames(insn))) options = [insn.id] if insn.priority: @@ -975,8 +986,12 @@ class LoopKernel(RecordWithoutPickling): if insn.predicates: lines.append(10*" " + "if (%s)" % " && ".join(insn.predicates)) + import loopy as lp + for insn in kernel.instructions: + print_insn(insn) + dep_lines = [] - for insn in self.instructions: + for insn in kernel.instructions: if insn.insn_deps: dep_lines.append("%s : %s" % (insn.id, ",".join(insn.insn_deps))) if dep_lines: @@ -987,10 +1002,10 @@ class LoopKernel(RecordWithoutPickling): lines.append(sep) - if self.schedule is not None: + if kernel.schedule is not None: lines.append("SCHEDULE:") from loopy.schedule import dump_schedule - lines.append(dump_schedule(self.schedule)) + lines.append(dump_schedule(kernel.schedule)) lines.append(sep) return "\n".join(lines) diff --git a/loopy/precompute.py b/loopy/precompute.py index 935d6d44040cf56b3ca3bdbbec26e22ec750a05d..726cc0786d44c59d5ba6179e6e69cc837a0b2992 100644 --- a/loopy/precompute.py +++ b/loopy/precompute.py @@ -132,7 +132,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): access_descriptors, array_base_map, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - target_var_name): + temporary_name): super(RuleInvocationReplacer, self).__init__(rule_mapping_context) self.subst_name = subst_name @@ -146,7 +146,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.storage_axis_sources = storage_axis_sources self.non1_storage_axis_names = non1_storage_axis_names - self.target_var_name = target_var_name + self.temporary_name = temporary_name def map_substitution(self, name, tag, arguments, expn_state): if not ( @@ -196,7 +196,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): ax_index = simplify_via_aff(ax_index - sax_base_idx) stor_subscript.append(ax_index) - new_outer_expr = var(self.target_var_name) + new_outer_expr = var(self.temporary_name) if stor_subscript: new_outer_expr = new_outer_expr.index(tuple(stor_subscript)) @@ -210,9 +210,9 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): def precompute(kernel, subst_use, sweep_inames=[], within=None, - storage_axes=None, precompute_inames=None, storage_axis_to_tag={}, - default_tag="l.auto", dtype=None, fetch_bounding_box=False, - temporary_is_local=None): + storage_axes=None, temporary_name=None, precompute_inames=None, + storage_axis_to_tag={}, default_tag="l.auto", dtype=None, + fetch_bounding_box=False, temporary_is_local=None): """Precompute the expression described in the substitution rule determined by *subst_use* and store it in a temporary array. A precomputation needs two things to operate, a list of *sweep_inames* (order irrelevant) and an @@ -263,6 +263,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, May also equivalently be a comma-separated string. :arg within: a stack match as understood by :func:`loopy.context_matching.parse_stack_match`. + :arg temporary_name: + The temporary variable name to use for storing the precomputed data. + If it does not exist, it will be created. If it does exist, its properties + (such as size, type) are checked (and updated, if possible) to match + its use. :arg precompute_inames: If the specified inames do not already exist, they will be created. If they do already exist, their loop domain is verified @@ -584,8 +589,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ set up compute insn - target_var_name = var_name_gen(based_on=c_subst_name) - assignee = var(target_var_name) + if temporary_name is None: + temporary_name = var_name_gen(based_on=c_subst_name) + + assignee = var(temporary_name) if non1_storage_axis_names: assignee = assignee.index( @@ -633,7 +640,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, access_descriptors, abm, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - target_var_name) + temporary_name) kernel = invr.map_kernel(kernel) kernel = kernel.copy( @@ -655,15 +662,69 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, if temporary_is_local is None: temporary_is_local = lp.auto + new_temp_shape = tuple(abm.non1_storage_shape) + new_temporary_variables = kernel.temporary_variables.copy() - temp_var = lp.TemporaryVariable( - name=target_var_name, - dtype=dtype, - base_indices=(0,)*len(abm.non1_storage_shape), - shape=tuple(abm.non1_storage_shape), - is_local=temporary_is_local) - - new_temporary_variables[target_var_name] = temp_var + if temporary_name not in new_temporary_variables: + temp_var = lp.TemporaryVariable( + name=temporary_name, + dtype=dtype, + base_indices=(0,)*len(new_temp_shape), + shape=tuple(abm.non1_storage_shape), + is_local=temporary_is_local) + + else: + temp_var = new_temporary_variables[temporary_name] + + # {{{ check and adapt existing temporary + + if temp_var.dtype is lp.auto: + pass + elif temp_var.dtype is not lp.auto and dtype is lp.auto: + dtype = temp_var.dtype + elif temp_var.dtype is not lp.auto and dtype is not lp.auto: + if temp_var.dtype != dtype: + raise LoopyError("Existing and new dtype of temporary '%s' " + "do not match (existing: %s, new: %s)" + % (temporary_name, temp_var.dtype, dtype)) + + temp_var = temp_var.copy(dtype=dtype) + + if len(temp_var.shape) != len(new_temp_shape): + raise LoopyError("Existing and new temporary '%s' do not " + "have matching number of dimensions " + % (temporary_name, + len(temp_var.shape), len(new_temp_shape))) + + if temp_var.base_indices != (0,) * len(new_temp_shape): + raise LoopyError("Existing and new temporary '%s' do not " + "have matching number of dimensions " + % (temporary_name, + len(temp_var.shape), len(new_temp_shape))) + + new_temp_shape = tuple( + max(i, ex_i) + for i, ex_i in zip(new_temp_shape, temp_var.shape)) + + temp_var = temp_var.copy(shape=new_temp_shape) + + if temporary_is_local == temp_var.is_local: + pass + elif temporary_is_local is lp.auto: + temporary_is_local = temp_var.is_local + elif temp_var.is_local is lp.auto: + pass + else: + raise LoopyError("Existing and new temporary '%s' do not " + "have matching values of 'is_local'" + % (temporary_name, + temp_var.is_local, temporary_is_local)) + + temp_var = temp_var.copy(is_local=temporary_is_local) + + # }}} + + new_temporary_variables[temporary_name] = temp_var kernel = kernel.copy( temporary_variables=new_temporary_variables)