From 639bcf61a7a3f612a8eedfc5ee0de4412b9d71e2 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sat, 4 May 2013 01:11:41 -0400 Subject: [PATCH] Add fetch_bounding_box flag to precompute, add_prefetch. --- loopy/__init__.py | 6 +++-- loopy/cse.py | 9 +++++--- loopy/isl_helpers.py | 53 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 7a1ac8d26..354510691 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -820,7 +820,8 @@ def _process_footprint_subscripts(kernel, rule_name, sweep_inames, # }}} def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, - default_tag="l.auto", rule_name=None, footprint_subscripts=None): + default_tag="l.auto", rule_name=None, footprint_subscripts=None, + fetch_bounding_box=False): """Prefetch all accesses to the variable *var_name*, with all accesses being swept through *sweep_inames*. @@ -908,7 +909,8 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, footprint_subscripts, arg) new_kernel = precompute(kernel, subst_use, sweep_inames, new_storage_axis_names=dim_arg_names, - default_tag=default_tag, dtype=arg.dtype) + default_tag=default_tag, dtype=arg.dtype, + fetch_bounding_box=fetch_bounding_box) # {{{ remove inames that were temporarily added by slice sweeps diff --git a/loopy/cse.py b/loopy/cse.py index 77594b412..666dba8d3 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -538,7 +538,7 @@ class InvocationReplacer(ExpandingIdentityMapper): def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axes=None, new_storage_axis_names=None, storage_axis_to_tag={}, - default_tag="l.auto", dtype=None): + default_tag="l.auto", dtype=None, fetch_bounding_box=False): """Precompute the expression described in the substitution rule determined by *subst_use* and store it in a temporary array. A precomputation needs two things to operate, a list of *sweep_inames* (order irrelevant) and an @@ -807,8 +807,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axis_names, storage_axis_sources, sweep_inames, invocation_descriptors) - from loopy.isl_helpers import convexify - new_domain = convexify(new_domain) + from loopy.isl_helpers import convexify, boxify + if fetch_bounding_box: + new_domain = boxify(kernel.cache_manager, new_domain, storage_axis_names) + else: + new_domain = convexify(new_domain) for saxis in storage_axis_names: if saxis not in non1_storage_axis_names: diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index df8dc9aa5..fea91d7f9 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -350,5 +350,58 @@ def convexify(domain): +def boxify(cache_manager, domain, box_inames): + var_dict = domain.get_var_dict(dim_type.set) + box_iname_indices = [var_dict[iname][1] for iname in box_inames] + n_nonbox_inames = min(box_iname_indices) + + assert box_iname_indices == range( + n_nonbox_inames, domain.dim(dim_type.set)) + + n_old_parameters = domain.dim(dim_type.param) + domain = domain.move_dims( + dim_type.param, n_old_parameters, dim_type.set, 0, n_nonbox_inames) + + result = domain.universe_like() + zero = isl.Aff.zero_on_domain(result.space) + + for i in xrange(len(box_iname_indices)): + iname_aff = zero.add_coefficient(dim_type.in_, i, 1) + + def add_in_dims(aff): + return aff.add_dims(dim_type.in_, len(box_inames)) + + iname_min = add_in_dims(cache_manager.dim_min(domain, i)) + iname_max = add_in_dims(cache_manager.dim_max(domain, i)) + + iname_slab = (iname_min.le_set(iname_aff) + .intersect(iname_max.ge_set(iname_aff))).coalesce() + + result = result & iname_slab + + for i, iname in enumerate(box_inames): + result = result.set_dim_name(dim_type.set, i, iname) + + result = result.move_dims( + dim_type.set, 0, dim_type.param, n_old_parameters, n_nonbox_inames) + + if isinstance(result, isl.BasicSet): + return result + + if len(result.get_basic_sets()) > 1: + result = result.coalesce() + + res_bsets = result.get_basic_sets() + if len(res_bsets) == 1: + result, = res_bsets + return result + else: + raise RuntimeError("boxify did not yield convex set") + + + + + + # vim: foldmethod=marker -- GitLab