diff --git a/loopy/__init__.py b/loopy/__init__.py index 7a1ac8d264b909a1f2da5362d8c0633171747f38..3545106917b230070e1ef91f9ae6db54059f9ee7 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -820,7 +820,8 @@ def _process_footprint_subscripts(kernel, rule_name, sweep_inames, # }}} def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, - default_tag="l.auto", rule_name=None, footprint_subscripts=None): + default_tag="l.auto", rule_name=None, footprint_subscripts=None, + fetch_bounding_box=False): """Prefetch all accesses to the variable *var_name*, with all accesses being swept through *sweep_inames*. @@ -908,7 +909,8 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, footprint_subscripts, arg) new_kernel = precompute(kernel, subst_use, sweep_inames, new_storage_axis_names=dim_arg_names, - default_tag=default_tag, dtype=arg.dtype) + default_tag=default_tag, dtype=arg.dtype, + fetch_bounding_box=fetch_bounding_box) # {{{ remove inames that were temporarily added by slice sweeps diff --git a/loopy/cse.py b/loopy/cse.py index 77594b4124ef56984db87c78abc287ab3a598f87..666dba8d339ca234e84bbe87aae5968b595440b5 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -538,7 +538,7 @@ class InvocationReplacer(ExpandingIdentityMapper): def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axes=None, new_storage_axis_names=None, storage_axis_to_tag={}, - default_tag="l.auto", dtype=None): + default_tag="l.auto", dtype=None, fetch_bounding_box=False): """Precompute the expression described in the substitution rule determined by *subst_use* and store it in a temporary array. A precomputation needs two things to operate, a list of *sweep_inames* (order irrelevant) and an @@ -807,8 +807,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axis_names, storage_axis_sources, sweep_inames, invocation_descriptors) - from loopy.isl_helpers import convexify - new_domain = convexify(new_domain) + from loopy.isl_helpers import convexify, boxify + if fetch_bounding_box: + new_domain = boxify(kernel.cache_manager, new_domain, storage_axis_names) + else: + new_domain = convexify(new_domain) for saxis in storage_axis_names: if saxis not in non1_storage_axis_names: diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index df8dc9aa5cc734bd0dd95763f6df01f842cdf2fd..fea91d7f9e20ca83f0b752a739c44297e6253689 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -350,5 +350,58 @@ def convexify(domain): +def boxify(cache_manager, domain, box_inames): + var_dict = domain.get_var_dict(dim_type.set) + box_iname_indices = [var_dict[iname][1] for iname in box_inames] + n_nonbox_inames = min(box_iname_indices) + + assert box_iname_indices == range( + n_nonbox_inames, domain.dim(dim_type.set)) + + n_old_parameters = domain.dim(dim_type.param) + domain = domain.move_dims( + dim_type.param, n_old_parameters, dim_type.set, 0, n_nonbox_inames) + + result = domain.universe_like() + zero = isl.Aff.zero_on_domain(result.space) + + for i in xrange(len(box_iname_indices)): + iname_aff = zero.add_coefficient(dim_type.in_, i, 1) + + def add_in_dims(aff): + return aff.add_dims(dim_type.in_, len(box_inames)) + + iname_min = add_in_dims(cache_manager.dim_min(domain, i)) + iname_max = add_in_dims(cache_manager.dim_max(domain, i)) + + iname_slab = (iname_min.le_set(iname_aff) + .intersect(iname_max.ge_set(iname_aff))).coalesce() + + result = result & iname_slab + + for i, iname in enumerate(box_inames): + result = result.set_dim_name(dim_type.set, i, iname) + + result = result.move_dims( + dim_type.set, 0, dim_type.param, n_old_parameters, n_nonbox_inames) + + if isinstance(result, isl.BasicSet): + return result + + if len(result.get_basic_sets()) > 1: + result = result.coalesce() + + res_bsets = result.get_basic_sets() + if len(res_bsets) == 1: + result, = res_bsets + return result + else: + raise RuntimeError("boxify did not yield convex set") + + + + + + # vim: foldmethod=marker