diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 7cc381f11d1239cba5656a9dc7a04cddaa14a368..04599d119e1b193d7ffb9c33687c1c536c2d162e 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -27,6 +27,13 @@ import islpy as isl from islpy import dim_type +def rewrite_loop_bound_expression(kernel, expr): + for rewriter in kernel.loop_bound_expression_rewriters: + expr = rewriter(expr) + + return expr + + # {{{ approximate, convex bounds check generator def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domain): diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index d206faad5bd84e3a1c7e7c061673f3d5d1144c84..0128f1786af0e728babcd3e638369f15d28d0012 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -1,8 +1,6 @@ """Loop nest build top-level control/hoisting.""" -from __future__ import division -from __future__ import absolute_import -import six +from __future__ import division, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -27,6 +25,8 @@ THE SOFTWARE. """ +import six + from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.schedule import ( @@ -464,12 +464,15 @@ def build_loop_nest(codegen_state, schedule_index): if bounds_checks or pred_checks: from loopy.symbolic import constraint_to_cond_expr + from loopy.codegen.bounds import rewrite_loop_bound_expression prev_gen_code = gen_code def gen_code(inner_codegen_state): condition_exprs = [ - constraint_to_cond_expr(cns) + rewrite_loop_bound_expression( + kernel, + constraint_to_cond_expr(cns)) for cns in bounds_checks] + [ pred_chk for pred_chk in pred_checks] diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index ad80475c1d27f67b3df8a885f60dd96ff28efe6a..516d8385854c1e93cbc9a23ba4403b17926be193 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -349,7 +349,8 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): slabs = get_slab_decomposition(kernel, loop_iname) - from loopy.codegen.bounds import get_usable_inames_for_conditional + from loopy.codegen.bounds import ( + get_usable_inames_for_conditional, rewrite_loop_bound_expression) # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index) @@ -451,7 +452,11 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): astb.emit_initializer( codegen_state, kernel.index_dtype, loop_iname, - ecm(pw_aff_to_expr(lbound), PREC_NONE, "i"), + ecm( + rewrite_loop_bound_expression( + kernel, + pw_aff_to_expr(lbound)), + PREC_NONE, "i"), is_const=True), astb.emit_blank_line(), inner, @@ -469,7 +474,11 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): codegen_state, astb.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, - pw_aff_to_expr(lbound), pw_aff_to_expr(ubound), inner_ast))) + rewrite_loop_bound_expression(kernel, + pw_aff_to_expr(lbound)), + rewrite_loop_bound_expression(kernel, + pw_aff_to_expr(ubound)), + inner_ast))) return merge_codegen_results(codegen_state, result) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 5dff5e53c04521bcd2f53cb2fc971ec12227149c..98a5feb8356cc0743fc43d30265a3013e6357748 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -147,6 +147,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: silenced_warnings + .. attribute:: loop_bound_expression_rewriters + + A tuple of expression mappings that need to be applied to loop bound + expressions once generated. This is necessary, for example, to + capture ILP-based rewritings of data-dependent loop bounds. + .. attribute:: applied_iname_rewrites A list of past substitution dictionaries that @@ -189,6 +195,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): silenced_warnings=[], applied_iname_rewrites=[], + loop_bound_expression_rewriters=(), cache_manager=None, index_dtype=np.int32, options=None, @@ -279,7 +286,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT - ImmutableRecordWithoutPickling.__init__(self, + super(LoopKernel, self).__init__( domains=domains, instructions=instructions, args=args, @@ -297,6 +304,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): substitutions=substitutions, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, + loop_bound_expression_rewriters=loop_bound_expression_rewriters, function_manglers=function_manglers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, diff --git a/loopy/transform/ilp.py b/loopy/transform/ilp.py index 77840753258fa545aa01ef3e8c58cbc36e66ed72..df8240ea3ea9ae9ddb72597af73cb4819cc20a2b 100644 --- a/loopy/transform/ilp.py +++ b/loopy/transform/ilp.py @@ -170,7 +170,10 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None): return kernel.copy( temporary_variables=new_temp_vars, - instructions=new_insns) + instructions=new_insns, + loop_bound_expression_rewriters=( + kernel.loop_bound_expression_rewriters + + (eiii,))) # }}} diff --git a/loopy/version.py b/loopy/version.py index 6a02f4d99b8ae22a3aa86082ae0becb30b7b6448..fd7c66fa249333ecae66c10d0ce919f806d9a884 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v53-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v54-islpy%s" % _islpy_version diff --git a/test/test_loopy.py b/test/test_loopy.py index db4a382046cc1aaf1465e81cf493415ace57e64d..22b59c0722e6b3958bcf0be6be72b4ee20aa1593 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1995,6 +1995,31 @@ def test_integer_reduction(ctx_factory): assert function(out) +def test_ilp_modified_var_in_loop_bound(ctx_factory): + # https://github.com/inducer/loopy/issues/77 + + knl = lp.make_kernel([ + "{ [i] : 0 <= i < m }", + "{ [j] : 0 <= j < length }"], + """ + for i + <> rowstart = rowstarts[i] + <> rowend = rowstarts[i] + <> length = rowend - rowstart + y[i] = sum(j, values[rowstart+j] * x[colindices[rowstart + j]]) + end + """) + + knl = lp.add_and_infer_dtypes( + knl, { + 'values,x': np.float64, + 'rowstarts,colindices': knl.index_dtype}) + knl = lp.split_iname(knl, 'i', 2, inner_tag='ilp') + code = lp.generate_code_v2(knl).device_code() + print(code.replace("length[", "LENGTH[")) + assert 'length' not in code.replace("length[", "LENGTH[") + + def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop) watch_for_barrier = False