diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 51c0dca36b4159670e0b04ab7cd02f7e5275dcd5..17641ec019e0eccaec74951070ddb401cf3d11f1 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -23,14 +23,10 @@ THE SOFTWARE. """ - - import islpy as isl from islpy import dim_type - - def constraint_to_code(ccm, cns): if cns.is_equality(): comp_op = "==" @@ -40,6 +36,7 @@ def constraint_to_code(ccm, cns): from loopy.symbolic import constraint_to_expr return "%s %s 0" % (ccm(constraint_to_expr(cns), 'i'), comp_op) + # {{{ bounds check generator def get_bounds_checks(domain, check_inames, implemented_domain, @@ -71,6 +68,7 @@ def get_bounds_checks(domain, check_inames, implemented_domain, # }}} + # {{{ on which inames may a conditional depend? def get_usable_inames_for_conditional(kernel, sched_index): @@ -102,7 +100,4 @@ def get_usable_inames_for_conditional(kernel, sched_index): # }}} - - - # vim: foldmethod=marker diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index fa42edf88ab6c42681a3dba494b6d15ed1cfe3e6..57257fe1ba50596724267f4825da6e1efea9a860 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -25,16 +25,10 @@ THE SOFTWARE. """ - - - - from loopy.codegen import CodeGenerationState, gen_code_block import islpy as isl - - def get_admissible_conditional_inames_for(kernel, sched_index): """This function disallows conditionals on local-idx tagged inames if there is a barrier nested somewhere within. @@ -55,8 +49,6 @@ def get_admissible_conditional_inames_for(kernel, sched_index): return result - - def generate_code_for_sched_index(kernel, sched_index, codegen_state): from loopy.schedule import (EnterLoop, RunInstruction, Barrier) @@ -76,8 +68,8 @@ def generate_code_for_sched_index(kernel, sched_index, codegen_state): elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)): func = generate_sequential_loop_dim_code else: - raise RuntimeError("encountered (invalid) EnterLoop for '%s', tagged '%s'" - % (sched_item.iname, tag)) + raise RuntimeError("encountered (invalid) EnterLoop " + "for '%s', tagged '%s'" % (sched_item.iname, tag)) return func(kernel, sched_index, codegen_state) @@ -105,8 +97,6 @@ def generate_code_for_sched_index(kernel, sched_index, codegen_state): % type(sched_item)) - - def remove_inames_for_shared_hw_axes(kernel, cond_inames): """ See if cond_inames contains references to two (or more) inames that @@ -138,8 +128,6 @@ def remove_inames_for_shared_hw_axes(kernel, cond_inames): return frozenset(cond_inames - multi_use_inames) - - def build_loop_nest(kernel, sched_index, codegen_state): # Most of the complexity of this function goes towards finding groups of # instructions that can be nested inside a shared conditional. @@ -180,8 +168,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): # {{{ pass 2: find admissible conditional inames for each sibling schedule item admissible_cond_inames = [ - get_admissible_conditional_inames_for(kernel, sched_index) - for sched_index in my_sched_indices] + get_admissible_conditional_inames_for(kernel, i) + for i in my_sched_indices] # }}} @@ -210,7 +198,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): # so we can safely overapproximate here. overapproximate=True) - def build_insn_group(sched_indices_and_cond_inames, codegen_state, done_group_lengths=set()): + def build_insn_group(sched_indices_and_cond_inames, codegen_state, + done_group_lengths=set()): # done_group_lengths serves to prevent infinite recursion by imposing a # bigger and bigger minimum size on the group of shared inames found. @@ -224,7 +213,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): # Keep growing schedule item group as long as group fulfills minimum # size requirement. - bounds_check_cache = BoundsCheckCache(kernel, codegen_state.implemented_domain) + bounds_check_cache = BoundsCheckCache( + kernel, codegen_state.implemented_domain) current_iname_set = cond_inames @@ -236,7 +226,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): candidate_group_length += 1 continue - other_sched_index, other_cond_inames = sched_indices_and_cond_inames[candidate_group_length-1] + other_sched_index, other_cond_inames = \ + sched_indices_and_cond_inames[candidate_group_length-1] current_iname_set = current_iname_set & other_cond_inames # {{{ see which inames are actually used in group @@ -244,7 +235,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): # And only generate conditionals for those. from loopy.schedule import find_used_inames_within used_inames = set() - for subsched_index, _ in sched_indices_and_cond_inames[0:candidate_group_length]: + for subsched_index, _ in \ + sched_indices_and_cond_inames[0:candidate_group_length]: used_inames |= find_used_inames_within(kernel, subsched_index) # }}} @@ -255,7 +247,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): bounds_checks = bounds_check_cache(only_unshared_inames) if bounds_checks or candidate_group_length == 1: - # length-1 must always be an option to reach the recursion base case below + # length-1 must always be an option to reach the recursion base + # case below found_hoists.append((candidate_group_length, bounds_checks)) candidate_group_length += 1 @@ -283,7 +276,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): if group_length == 1: # group only contains starting schedule item - result = [generate_code_for_sched_index(kernel, sched_index, new_codegen_state)] + result = [generate_code_for_sched_index( + kernel, sched_index, new_codegen_state)] else: # recurse with a bigger done_group_lengths result = build_insn_group( @@ -295,7 +289,8 @@ def build_loop_nest(kernel, sched_index, codegen_state): from loopy.codegen import wrap_in_if from loopy.codegen.bounds import constraint_to_code result = [wrap_in_if( - [constraint_to_code(codegen_state.c_code_mapper, cns) for cns in bounds_checks], + [constraint_to_code(codegen_state.c_code_mapper, cns) + for cns in bounds_checks], gen_code_block(result))] return result + build_insn_group( diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 0858a67d933c63e05582a02efea9741e5c792f1e..ab603aa6f6fd334a851ff3a58bb29af52b2ccafb 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -24,20 +24,17 @@ THE SOFTWARE. """ - - import islpy as isl - - def wrap_in_bounds_checks(ccm, domain, check_inames, implemented_domain, stmt): from loopy.codegen.bounds import get_bounds_checks, constraint_to_code bounds_checks = get_bounds_checks( domain, check_inames, implemented_domain, overapproximate=False) - bounds_check_set = isl.Set.universe(domain.get_space()).add_constraints(bounds_checks) + bounds_check_set = isl.Set.universe(domain.get_space()) \ + .add_constraints(bounds_checks) bounds_check_set, new_implemented_domain = isl.align_two( bounds_check_set, implemented_domain) new_implemented_domain = new_implemented_domain & bounds_check_set @@ -51,8 +48,6 @@ def wrap_in_bounds_checks(ccm, domain, check_inames, implemented_domain, stmt): return stmt, new_implemented_domain - - def generate_instruction_code(kernel, insn, codegen_state): from loopy.codegen import GeneratedInstruction @@ -91,7 +86,9 @@ def generate_instruction_code(kernel, insn, codegen_state): ast=S(r'printf("write %s[%s]\n", %s);' % (insn.get_assignee_var_name(), ",".join(len(idx) * ["%d"]), - ",".join(ccm(i, prec=None, type_context="i") for i in idx))), + ",".join( + ccm(i, prec=None, type_context="i") + for i in idx))), implemented_domain=None), result ]) @@ -99,7 +96,4 @@ def generate_instruction_code(kernel, insn, codegen_state): return result - - - # vim: foldmethod=marker diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 41118cbf173c2ede733da51a1960f4676bd31e16..a20ec818bd18e83719c5ea7b7f26fd6ff41f4956 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -23,17 +23,12 @@ THE SOFTWARE. """ - - from loopy.codegen import gen_code_block import islpy as isl from islpy import dim_type from loopy.codegen.control import build_loop_nest - - - # {{{ conditional-minimizing slab decomposition def get_slab_decomposition(kernel, iname, sched_index, codegen_state): @@ -115,6 +110,7 @@ def get_slab_decomposition(kernel, iname, sched_index, codegen_state): # }}} + # {{{ unrolled loops def generate_unroll_loop(kernel, sched_index, codegen_state): @@ -144,6 +140,7 @@ def generate_unroll_loop(kernel, sched_index, codegen_state): # }}} + def intersect_kernel_with_slab(kernel, slab, iname): hdi = kernel.get_home_domain_index(iname) home_domain = kernel.domains[hdi] @@ -156,8 +153,10 @@ def intersect_kernel_with_slab(kernel, slab, iname): # {{{ hw-parallel loop -def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=None): - from loopy.kernel.data import UniqueTag, HardwareParallelTag, LocalIndexTag, GroupIndexTag +def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, + hw_inames_left=None): + from loopy.kernel.data import ( + UniqueTag, HardwareParallelTag, LocalIndexTag, GroupIndexTag) if hw_inames_left is None: hw_inames_left = [iname @@ -235,6 +234,7 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left= # }}} + # {{{ sequential loop def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):