From 5277455df1a2d0d15a789f3deb277422c1a1e72c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sat, 28 Feb 2015 13:51:12 -0600 Subject: [PATCH] Improve handling of no storage indices in precompute --- loopy/precompute.py | 78 ++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/loopy/precompute.py b/loopy/precompute.py index 2bde8022c..af34c57ac 100644 --- a/loopy/precompute.py +++ b/loopy/precompute.py @@ -115,9 +115,10 @@ def build_per_access_storage_to_domain_map(invdesc, domain, else: stor2sweep = stor2sweep.intersect(cns_map) - stor2sweep = stor2sweep.move_dims( - dim_type.in_, 0, - dim_type.out, rn, stor_dim) + if stor2sweep is not None: + stor2sweep = stor2sweep.move_dims( + dim_type.in_, 0, + dim_type.out, rn, stor_dim) # stor2sweep is back in map_space return stor2sweep @@ -168,7 +169,7 @@ def build_global_storage_to_sweep_map(kernel, invocation_descriptors, invdesc.is_in_footprint = True if isinstance(global_stor2sweep, isl.BasicMap): - global_stor2sweep = isl.Map.from_basic_map(stor2sweep) + global_stor2sweep = isl.Map.from_basic_map(global_stor2sweep) global_stor2sweep = global_stor2sweep.intersect_range(domain_dup_sweep) # space for global_stor2sweep: @@ -786,39 +787,56 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, expanding_inames = sweep_inames_set | frozenset(expanding_usage_arg_deps) assert expanding_inames <= kernel.all_inames() - # {{{ find domain to be changed + if storage_axis_names: + # {{{ find domain to be changed - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(kernel, expanding_inames) + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(kernel, expanding_inames) - if domch.leaf_domain_index is not None: - # If the sweep inames are at home in parent domains, then we'll add - # fetches with loops over copies of these parent inames that will end - # up being scheduled *within* loops over these parents. + if domch.leaf_domain_index is not None: + # If the sweep inames are at home in parent domains, then we'll add + # fetches with loops over copies of these parent inames that will end + # up being scheduled *within* loops over these parents. - for iname in sweep_inames_set: - if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: - raise RuntimeError("sweep iname '%s' is not 'at home' in the " - "sweep's leaf domain" % iname) + for iname in sweep_inames_set: + if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: + raise RuntimeError("sweep iname '%s' is not 'at home' in the " + "sweep's leaf domain" % iname) - # }}} + # }}} - (non1_storage_axis_names, new_domain, - storage_base_indices, non1_storage_base_indices, non1_storage_shape) = \ - get_access_info(kernel, domch.domain, subst_name, - storage_axis_names, storage_axis_sources, - sweep_inames, invocation_descriptors) + (non1_storage_axis_names, new_domain, + storage_base_indices, non1_storage_base_indices, non1_storage_shape) = \ + get_access_info(kernel, domch.domain, subst_name, + storage_axis_names, storage_axis_sources, + sweep_inames, invocation_descriptors) + + from loopy.isl_helpers import convexify, boxify + if fetch_bounding_box: + new_domain = boxify(kernel.cache_manager, new_domain, + non1_storage_axis_names, kernel.assumptions) + else: + new_domain = convexify(new_domain) - from loopy.isl_helpers import convexify, boxify - if fetch_bounding_box: - new_domain = boxify(kernel.cache_manager, new_domain, - non1_storage_axis_names, kernel.assumptions) + for saxis in storage_axis_names: + if saxis not in non1_storage_axis_names: + del new_iname_to_tag[saxis] + + new_kernel_domains = domch.get_domains_with(new_domain) else: - new_domain = convexify(new_domain) + # leave kernel domains unchanged + new_kernel_domains = kernel.domains + + non1_storage_axis_names = () + storage_base_indices = () + non1_storage_base_indices = () + non1_storage_shape = () - for saxis in storage_axis_names: - if saxis not in non1_storage_axis_names: - del new_iname_to_tag[saxis] + # no index dependencies--every reference to the subst rule + # is necessarily in the footprint. + + for invdesc in invocation_descriptors: + invdesc.is_in_footprint = True # {{{ set up compute insn @@ -886,7 +904,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} kernel = kernel.copy( - domains=domch.get_domains_with(new_domain), + domains=new_kernel_domains, instructions=[compute_insn] + kernel.instructions, temporary_variables=new_temporary_variables) -- GitLab