diff --git a/loopy/__init__.py b/loopy/__init__.py index 6d64589c8940b0e24e77e7210c3b95a270e54fe4..4422f18f42acb072f5d6e1e66f295996f7a2822c 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -164,12 +164,6 @@ def make_kernel(*args, **kwargs): assignee_indices.append(index_expr.name) - from loopy.kernel import LocalIndexTagBase - from pytools import any - is_local = any( - isinstance(new_iname_to_tag.get(iname), LocalIndexTagBase) - for iname in assignee_indices) - base_indices, shape = \ find_var_base_indices_and_shape_from_inames( new_domain, assignee_indices) @@ -177,7 +171,7 @@ def make_kernel(*args, **kwargs): new_temp_vars[assignee_name] = TemporaryVariable( name=assignee_name, dtype=np.dtype(insn.temp_var_type), - is_local=is_local, + is_local=None, base_indices=base_indices, shape=shape) diff --git a/loopy/check.py b/loopy/check.py index 9903ec70e83edb4bc4bef6bba261c6d119a29be6..fb52b8f15b635ef3eca9279e72643a5f51019091 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -120,7 +120,7 @@ def check_for_write_races(kernel): elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] - if temp_var.is_local: + if temp_var.is_local == True: local_parallel_insn_inames = set( iname for iname in insn.all_inames() @@ -130,7 +130,7 @@ def check_for_write_races(kernel): inames_without_write_dep = local_parallel_insn_inames - ( assignee_inames & local_parallel_insn_inames) - else: + elif temp_var.is_local == False: ilp_inames = set( iname for iname in insn.all_inames() @@ -139,6 +139,10 @@ def check_for_write_races(kernel): inames_without_write_dep = ilp_inames - ( assignee_inames & ilp_inames) + else: + raise RuntimeError("temp var '%s' hasn't decided on " + "whether it is local" % temp_var.name) + else: raise RuntimeError("invalid assignee name in instruction '%s'" diff --git a/loopy/cse.py b/loopy/cse.py index 14980b8235852d02d618e3606b63bb611cc86771..4fe1e974850e52844eae239a0cea58639f110e81 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -11,8 +11,7 @@ from pymbolic import var -def check_cse_iname_deps(iname, duplicate_inames, tag, dependencies, - target_var_is_local, cse): +def check_cse_iname_deps(iname, duplicate_inames, tag, dependencies, cse): from loopy.kernel import (LocalIndexTagBase, GroupIndexTag, IlpTag) if isinstance(tag, LocalIndexTagBase): @@ -25,10 +24,7 @@ def check_cse_iname_deps(iname, duplicate_inames, tag, dependencies, kind = "o" if iname not in duplicate_inames and iname in dependencies: - if ( - (target_var_is_local and kind in "li") - or - (not target_var_is_local and kind in "i")): + if kind == "i": raise RuntimeError( "When realizing CSE with tag '%s', encountered iname " "'%s' which is depended upon by the CSE and tagged " @@ -41,14 +37,10 @@ def check_cse_iname_deps(iname, duplicate_inames, tag, dependencies, if iname in duplicate_inames and kind == "g": raise RuntimeError("duplicating the iname '%s' into " "group index axes is not helpful, as they cannot " - "collaborate in computing a local variable" + "collaborate in computing a local/private variable" %iname) if iname in dependencies: - if not target_var_is_local and iname in duplicate_inames and kind == "l": - raise RuntimeError("invalid: hardware-parallelized " - "fetch into private variable") - return # the iname is *not* a dependency of the fetch expression @@ -277,7 +269,7 @@ def process_cses(kernel, lead_csed, cse_descriptors): -def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local, +def make_compute_insn(kernel, lead_csed, target_var_name, independent_inames, new_inames, ind_iname_to_tag): insn = lead_csed.insn @@ -299,8 +291,7 @@ def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local, tag = kernel.iname_to_tag.get(iname) check_cse_iname_deps( - iname, independent_inames, tag, dependencies, - target_var_is_local, lead_csed.cse) + iname, independent_inames, tag, dependencies, lead_csed.cse) # }}} @@ -450,11 +441,6 @@ def realize_cse(kernel, cse_tag, dtype, independent_inames=[], var_base = "cse" target_var_name = kernel.make_unique_var_name(var_base) - from loopy.kernel import LocalIndexTagBase - target_var_is_local = any( - isinstance(tag, LocalIndexTagBase) - for tag in ind_iname_to_tag.itervalues()) - from loopy.kernel import (TemporaryVariable, find_var_base_indices_and_shape_from_inames) @@ -468,12 +454,12 @@ def realize_cse(kernel, cse_tag, dtype, independent_inames=[], dtype=np.dtype(dtype), base_indices=target_var_base_indices, shape=target_var_shape, - is_local=target_var_is_local) + is_local=None) # }}} compute_insn = make_compute_insn( - kernel, lead_csed, target_var_name, target_var_is_local, + kernel, lead_csed, target_var_name, independent_inames, new_inames, ind_iname_to_tag) # {{{ substitute variable references into instructions diff --git a/loopy/preprocess.py b/loopy/preprocess.py index a08c2fb3e75bb09dfc309d70e2d3aa41d01f14f0..078bb71a4a92fb3e1ec97004989c86c7b99c05ff 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -6,6 +6,37 @@ import pyopencl.characterize as cl_char +# {{{ local temporary finding + +def mark_local_temporaries(kernel): + new_temp_vars = {} + from loopy.kernel import LocalIndexTagBase + + writers = find_accessors(kernel, readers=False) + + from loopy.symbolic import DependencyMapper + dm = DependencyMapper(composite_leaves=False) + def get_deps(expr): + return set(var.name for var in dm(expr)) + + for temp_var in kernel.temporary_variables.itervalues(): + my_writers = writers[temp_var.name] + + has_local_parallel_write = False + for insn_id in my_writers: + insn = kernel.id_to_insn[insn_id] + has_local_parallel_write = has_local_parallel_write or any( + isinstance(kernel.iname_to_tag.get(iname), LocalIndexTagBase) + for iname in get_deps(insn.get_assignee_indices()) + & kernel.all_inames()) + + new_temp_vars[temp_var.name] = temp_var.copy( + is_local=has_local_parallel_write) + + return kernel.copy(temporary_variables=new_temp_vars) + +# }}} + # {{{ reduction iname duplication def duplicate_reduction_inames(kernel): @@ -176,6 +207,8 @@ def realize_reduction(kernel): def find_accessors(kernel, readers): """ + :arg readers: whether to find insns that read or that write + the variables in question. :return: a dict that maps variable names to ids of insns that write to that variable. """ @@ -619,6 +652,7 @@ def adjust_local_temp_var_storage(kernel): def preprocess_kernel(kernel): + kernel = mark_local_temporaries(kernel) kernel = duplicate_reduction_inames(kernel) kernel = realize_reduction(kernel)