diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 4bd7d7f45a3f28de6cc65118ac9af1f0f7aeb8cb..bbd6ab8bcc7830763f6485db1af7316b5d61820f 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -124,16 +124,23 @@ def generate_bounds_checks(domain, check_inames, implemented_domain): return filter_necessary_constraints( implemented_domain, domain_bset.get_constraints()) -def generate_bounds_checks_code(ccm, domain, check_inames, implemented_domain): - return [constraint_to_code(ccm, cns) for cns in +def wrap_in_bounds_checks(ccm, domain, check_inames, implemented_domain, stmt): + bounds_checks = generate_bounds_checks( + domain, check_inames, + implemented_domain) + + new_implemented_domain = implemented_domain & ( + isl.Set.universe(domain.get_space()).add_constraints(bounds_checks)) + + condition_codelets = [ + constraint_to_code(ccm, cns) for cns in generate_bounds_checks(domain, check_inames, implemented_domain)] -def wrap_in_bounds_checks(ccm, domain, check_inames, implemented_domain, stmt): - from loopy.codegen import wrap_in_if - return wrap_in_if( - generate_bounds_checks_code(ccm, domain, check_inames, - implemented_domain), - stmt) + if condition_codelets: + from cgen import If + stmt = If("\n&& ".join(condition_codelets), stmt) + + return stmt, new_implemented_domain def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt): # FIXME add admissible vars diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 60be2e1dcb607d04481b5253fc7407b9329d7461..fd50850c91215133a517def805765eb6f3698f8f 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -77,13 +77,13 @@ def generate_instruction_code(kernel, insn, codegen_state): from cgen import Assign insn_code = Assign(ccm(insn.assignee), ccm(insn.expression)) from loopy.codegen.bounds import wrap_in_bounds_checks - insn_code = wrap_in_bounds_checks( + insn_code, impl_domain = wrap_in_bounds_checks( ccm, kernel.domain, insn.all_inames(), ilpi.implemented_domain, insn_code) result.append(GeneratedInstruction( insn_id=insn.id, - implemented_domain=ilpi.implemented_domain, + implemented_domain=impl_domain, ast=insn_code)) from loopy.codegen import gen_code_block