diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 3beb9a2ee78b7d374997367162b51cc50d87410b..a1c2a5acb4fe7f4bca1d1b566808599ec0b33760 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -63,8 +63,8 @@ def get_bounds_checks(domain, check_inames, implemented_domain, result = result.compute_divs() from loopy.isl_helpers import convexify - result = convexify(result).get_constraints() - return result + result = convexify(result) + return result.get_constraints() # }}} diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 57257fe1ba50596724267f4825da6e1efea9a860..013ec5d099b892799ad1d7b83dcf91daf4db9a16 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -246,7 +246,7 @@ def build_loop_nest(kernel, sched_index, codegen_state): bounds_checks = bounds_check_cache(only_unshared_inames) - if bounds_checks or candidate_group_length == 1: + if bounds_checks or bounds_checks is None or candidate_group_length == 1: # length-1 must always be an option to reach the recursion base # case below found_hoists.append((candidate_group_length, bounds_checks)) @@ -271,27 +271,35 @@ def build_loop_nest(kernel, sched_index, codegen_state): if check_set is None: new_codegen_state = codegen_state + is_empty = False else: + is_empty = check_set.is_empty() new_codegen_state = codegen_state.intersect(check_set) - if group_length == 1: - # group only contains starting schedule item - result = [generate_code_for_sched_index( - kernel, sched_index, new_codegen_state)] + if is_empty: + result = [] else: - # recurse with a bigger done_group_lengths - result = build_insn_group( - sched_indices_and_cond_inames[0:group_length], - new_codegen_state, - done_group_lengths=done_group_lengths | set([group_length])) - - if bounds_checks: - from loopy.codegen import wrap_in_if - from loopy.codegen.bounds import constraint_to_code - result = [wrap_in_if( - [constraint_to_code(codegen_state.c_code_mapper, cns) - for cns in bounds_checks], - gen_code_block(result))] + if group_length == 1: + # group only contains starting schedule item + result = [generate_code_for_sched_index( + kernel, sched_index, new_codegen_state)] + if result == [None]: + result = [] + else: + # recurse with a bigger done_group_lengths + result = build_insn_group( + sched_indices_and_cond_inames[0:group_length], + new_codegen_state, + done_group_lengths=done_group_lengths | set([group_length])) + + if bounds_checks: + from loopy.codegen import wrap_in_if + from loopy.codegen.bounds import constraint_to_code + result = [ + wrap_in_if( + [constraint_to_code(codegen_state.c_code_mapper, cns) + for cns in bounds_checks], + gen_code_block(result))] return result + build_insn_group( sched_indices_and_cond_inames[group_length:], codegen_state) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index f333baadb765df8466c9193c6ac526e3c481a795..1dcbf8906baa376a77f0b51c8bbcf8122a149367 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -40,6 +40,9 @@ def wrap_in_bounds_checks(ccm, domain, check_inames, implemented_domain, stmt): bounds_check_set, implemented_domain) new_implemented_domain = new_implemented_domain & bounds_check_set + if bounds_check_set.is_empty(): + return None, None + condition_codelets = [constraint_to_code(ccm, cns) for cns in bounds_checks] if condition_codelets: @@ -61,7 +64,6 @@ def generate_instruction_code(kernel, insn, codegen_state): def generate_expr_instruction_code(kernel, insn, codegen_state): - ccm = codegen_state.c_code_mapper expr = insn.expression @@ -82,6 +84,9 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): codegen_state.implemented_domain, insn_code) + if insn_code is None: + return None + result = GeneratedInstruction( insn_id=insn.id, implemented_domain=impl_domain, @@ -139,6 +144,9 @@ def generate_c_instruction_code(kernel, insn, codegen_state): codegen_state.implemented_domain, Block(body)) + if insn_code is None: + return None + return GeneratedInstruction( insn_id=insn.id, implemented_domain=impl_domain,