From bd0caec0b46bcdc73dcebdc8a77a342b6e02016a Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 23 Feb 2020 13:50:02 -0600 Subject: [PATCH 01/56] rename {LivenessAnalysis, LoopKernel, SchedulerState}.schedule to {LivenessAnalysis, LoopKernel, SchedulerState}.program_outline --- loopy/auto_test.py | 2 +- loopy/check.py | 16 +-- loopy/codegen/__init__.py | 4 +- loopy/codegen/bounds.py | 5 +- loopy/codegen/control.py | 15 +-- loopy/codegen/loop.py | 9 +- loopy/codegen/result.py | 3 +- loopy/kernel/__init__.py | 14 +-- loopy/kernel/tools.py | 8 +- loopy/schedule/__init__.py | 168 ++++++++++++++++--------------- loopy/schedule/device_mapping.py | 66 ++++++------ loopy/schedule/tools.py | 16 +-- loopy/statistics.py | 2 +- loopy/target/c/__init__.py | 4 +- loopy/target/cuda.py | 2 +- loopy/target/execution.py | 2 +- loopy/target/opencl.py | 2 +- loopy/transform/save.py | 36 +++---- test/test_loopy.py | 2 +- 19 files changed, 193 insertions(+), 183 deletions(-) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 6837b99a0..a2cd1ff1b 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -540,7 +540,7 @@ def auto_test_vs_ref( test_knl = lp.preprocess_kernel(test_knl) - if not test_knl.schedule: + if not test_knl.program_outline: test_kernels = lp.generate_loop_schedules(test_knl) else: test_kernels = [test_knl] diff --git a/loopy/check.py b/loopy/check.py index 0d2bbff7c..494c04302 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -691,18 +691,18 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): local_axes = set() i = 0 - loop_end_i = past_end_i = len(kernel.schedule) + loop_end_i = past_end_i = len(kernel.program_outline) else: - assert isinstance(kernel.schedule[sched_index], CallKernel) - _, past_end_i = gather_schedule_block(kernel.schedule, sched_index) + assert isinstance(kernel.program_outline[sched_index], CallKernel) + _, past_end_i = gather_schedule_block(kernel.program_outline, sched_index) group_size, local_size = kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at(kernel.schedule, sched_index)) + get_insn_ids_for_block_at(kernel.program_outline, sched_index)) group_axes = set(ax for ax, length in enumerate(group_size)) local_axes = set(ax for ax, length in enumerate(local_size)) i = sched_index + 1 - assert isinstance(kernel.schedule[past_end_i - 1], ReturnFromKernel) + assert isinstance(kernel.program_outline[past_end_i - 1], ReturnFromKernel) loop_end_i = past_end_i - 1 # alternative: just disregard length-1 dimensions? @@ -711,7 +711,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): GroupIndexTag) while i < loop_end_i: - sched_item = kernel.schedule[i] + sched_item = kernel.program_outline[i] if isinstance(sched_item, CallKernel): i = _check_for_unused_hw_axes_in_kernel_chunk(kernel, i) @@ -765,7 +765,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): def check_for_unused_hw_axes_in_insns(kernel): - if kernel.schedule: + if kernel.program_outline: _check_for_unused_hw_axes_in_kernel_chunk(kernel) # }}} @@ -858,7 +858,7 @@ def check_that_all_insns_are_scheduled(kernel): from loopy.schedule import sched_item_to_insn_id scheduled_insns = set( insn_id - for sched_item in kernel.schedule + for sched_item in kernel.program_outline for insn_id in sched_item_to_insn_id(sched_item)) assert scheduled_insns <= all_schedulable_insns diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 11f874e1b..70feac54f 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -384,7 +384,7 @@ def generate_code_v2(kernel): from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) - if kernel.schedule is None: + if kernel.program_outline is None: from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) @@ -470,7 +470,7 @@ def generate_code_v2(kernel): kernel.target.host_program_name_prefix + kernel.name + kernel.target.host_program_name_suffix), - schedule_index_end=len(kernel.schedule)) + schedule_index_end=len(kernel.program_outline)) from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index b736191ec..3b44cf5d9 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -68,7 +68,8 @@ def get_usable_inames_for_conditional(kernel, sched_index): # Find our containing subkernel. Grab inames for all insns from there. within_subkernel = False - for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index]): + for sched_item_index, sched_item in enumerate( + kernel.program_outline[:sched_index]): from loopy.schedule import CallKernel, ReturnFromKernel if isinstance(sched_item, CallKernel): within_subkernel = True @@ -81,7 +82,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): return frozenset(result) insn_ids_for_subkernel = get_insn_ids_for_block_at( - kernel.schedule, subkernel_index) + kernel.program_outline, subkernel_index) inames_for_subkernel = ( iname diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index e9de52eb6..6b381850b 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -36,7 +36,7 @@ def synthesize_idis_for_extra_args(kernel, schedule_index): """ :returns: A list of :class:`loopy.codegen.ImplementedDataInfo` """ - sched_item = kernel.schedule[schedule_index] + sched_item = kernel.program_outline[schedule_index] from loopy.codegen import ImplementedDataInfo from loopy.kernel.data import InameArg, AddressSpace @@ -67,13 +67,13 @@ def synthesize_idis_for_extra_args(kernel, schedule_index): def generate_code_for_sched_index(codegen_state, sched_index): kernel = codegen_state.kernel - sched_item = kernel.schedule[sched_index] + sched_item = kernel.program_outline[sched_index] if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at) - _, past_end_i = gather_schedule_block(kernel.schedule, sched_index) + _, past_end_i = gather_schedule_block(kernel.program_outline, sched_index) assert past_end_i <= codegen_state.schedule_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) @@ -90,7 +90,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): new_codegen_state, sched_index) glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at(kernel.schedule, sched_index)) + get_insn_ids_for_block_at(kernel.program_outline, sched_index)) return merge_codegen_results(codegen_state, [ codegen_result, @@ -177,7 +177,8 @@ def generate_code_for_sched_index(codegen_state, sched_index): def get_required_predicates(kernel, sched_index): result = None - for _, sched_item in generate_sub_sched_items(kernel.schedule, sched_index): + for _, sched_item in generate_sub_sched_items( + kernel.program_outline, sched_index): if isinstance(sched_item, Barrier): my_preds = frozenset() elif isinstance(sched_item, RunInstruction): @@ -239,7 +240,7 @@ def build_loop_nest(codegen_state, schedule_index): i = schedule_index while i < codegen_state.schedule_index_end: - sched_item = kernel.schedule[i] + sched_item = kernel.program_outline[i] if isinstance(sched_item, LeaveLoop): break @@ -247,7 +248,7 @@ def build_loop_nest(codegen_state, schedule_index): my_sched_indices.append(i) if isinstance(sched_item, (EnterLoop, CallKernel)): - _, i = gather_schedule_block(kernel.schedule, i) + _, i = gather_schedule_block(kernel.program_outline, i) assert i <= codegen_state.schedule_index_end, \ "schedule block extends beyond schedule_index_end" diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index b3a877988..3cfb5110b 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -119,7 +119,7 @@ def get_slab_decomposition(kernel, iname): def generate_unroll_loop(codegen_state, sched_index): kernel = codegen_state.kernel - iname = kernel.schedule[sched_index].iname + iname = kernel.program_outline[sched_index].iname bounds = kernel.get_iname_bounds(iname, constants_only=True) @@ -161,7 +161,7 @@ def generate_unroll_loop(codegen_state, sched_index): def generate_vectorize_loop(codegen_state, sched_index): kernel = codegen_state.kernel - iname = kernel.schedule[sched_index].iname + iname = kernel.program_outline[sched_index].iname bounds = kernel.get_iname_bounds(iname, constants_only=True) @@ -234,7 +234,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, LocalIndexTag, GroupIndexTag, VectorizeTag) from loopy.schedule import get_insn_ids_for_block_at - insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) + insn_ids_for_block = get_insn_ids_for_block_at( + kernel.program_outline, schedule_index) if hw_inames_left is None: all_inames_by_insns = set() @@ -346,7 +347,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper - loop_iname = kernel.schedule[sched_index].iname + loop_iname = kernel.program_outline[sched_index].iname slabs = get_slab_decomposition(kernel, loop_iname) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 4318ad71c..d0ae18b59 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -282,7 +282,8 @@ def generate_host_or_device_program(codegen_state, schedule_index): from loopy.codegen.control import build_loop_nest if codegen_state.is_generating_device_code: from loopy.schedule import CallKernel - assert isinstance(codegen_state.kernel.schedule[schedule_index], CallKernel) + assert isinstance( + codegen_state.kernel.program_outline[schedule_index], CallKernel) from loopy.codegen.loop import set_up_hw_parallel_loops codegen_result = set_up_hw_parallel_loops( diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9096edcc0..cd14cf88b 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -158,7 +158,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): A list of :class:`loopy.KernelArgument` - .. attribute:: schedule + .. attribute:: program_outline *None* or a list of :class:`loopy.schedule.ScheduleItem` @@ -227,7 +227,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ constructor - def __init__(self, domains, instructions, args=None, schedule=None, + def __init__(self, domains, instructions, args=None, program_outline=None, name="loopy_kernel", preambles=None, preamble_generators=None, @@ -351,7 +351,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): domains=domains, instructions=instructions, args=args, - schedule=schedule, + program_outline=program_outline, name=name, preambles=preambles, preamble_generators=preamble_generators, @@ -1239,7 +1239,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): "rules", "instructions", "Dependencies", - "schedule", + "program_outline", ]) first_letter_to_what = dict( @@ -1341,12 +1341,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): "(use loopy.show_dependency_graph to visualize)") lines.extend(dep_lines) - if "schedule" in what and kernel.schedule is not None: + if "program_outline" in what and kernel.program_outline is not None: lines.extend(sep) if show_labels: lines.append("SCHEDULE:") from loopy.schedule import dump_schedule - lines.append(dump_schedule(kernel, kernel.schedule)) + lines.append(dump_schedule(kernel, kernel.program_outline)) lines.extend(sep) @@ -1473,7 +1473,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): "domains", "instructions", "args", - "schedule", + "program_outline", "name", "preambles", "assumptions", diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index bb6ae44c9..282a4348b 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -463,7 +463,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): from loopy.kernel.creation import apply_single_writer_depencency_heuristic kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=False) - if iname_cluster and not kernel.schedule: + if iname_cluster and not kernel.program_outline: try: from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) @@ -540,7 +540,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, ReturnFromKernel) - for sched_item in kernel.schedule: + for sched_item in kernel.program_outline: if isinstance(sched_item, EnterLoop): lines.append("subgraph cluster_%s { label=\"%s\"" % (sched_item.iname, sched_item.iname)) @@ -1732,7 +1732,7 @@ def get_subkernels(kernel): from loopy.schedule import CallKernel return tuple(sched_item.kernel_name - for sched_item in kernel.schedule + for sched_item in kernel.program_outline if isinstance(sched_item, CallKernel)) @@ -1752,7 +1752,7 @@ def get_subkernel_to_insn_id_map(kernel): subkernel = None result = {} - for sched_item in kernel.schedule: + for sched_item in kernel.program_outline: if isinstance(sched_item, CallKernel): subkernel = sched_item.kernel_name result[subkernel] = set() diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index f145c7122..8a29d1825 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -105,33 +105,33 @@ class Barrier(ScheduleItem): # {{{ schedule utilities -def gather_schedule_block(schedule, start_idx): - assert isinstance(schedule[start_idx], BeginBlockItem) +def gather_schedule_block(program_outline, start_idx): + assert isinstance(program_outline[start_idx], BeginBlockItem) level = 0 i = start_idx - while i < len(schedule): - if isinstance(schedule[i], BeginBlockItem): + while i < len(program_outline): + if isinstance(program_outline[i], BeginBlockItem): level += 1 - elif isinstance(schedule[i], EndBlockItem): + elif isinstance(program_outline[i], EndBlockItem): level -= 1 if level == 0: - return schedule[start_idx:i+1], i+1 + return program_outline[start_idx:i+1], i+1 i += 1 assert False -def generate_sub_sched_items(schedule, start_idx): - if not isinstance(schedule[start_idx], BeginBlockItem): - yield start_idx, schedule[start_idx] +def generate_sub_sched_items(program_outline, start_idx): + if not isinstance(program_outline[start_idx], BeginBlockItem): + yield start_idx, program_outline[start_idx] level = 0 i = start_idx - while i < len(schedule): - sched_item = schedule[i] + while i < len(program_outline): + sched_item = program_outline[i] if isinstance(sched_item, BeginBlockItem): level += 1 @@ -149,11 +149,11 @@ def generate_sub_sched_items(schedule, start_idx): assert False -def get_insn_ids_for_block_at(schedule, start_idx): +def get_insn_ids_for_block_at(program_outline, start_idx): return frozenset( sub_sched_item.insn_id for i, sub_sched_item in generate_sub_sched_items( - schedule, start_idx) + program_outline, start_idx) if isinstance(sub_sched_item, RunInstruction)) @@ -161,7 +161,7 @@ def find_active_inames_at(kernel, sched_index): active_inames = [] from loopy.schedule import EnterLoop, LeaveLoop - for sched_item in kernel.schedule[:sched_index]: + for sched_item in kernel.program_outline[:sched_index]: if isinstance(sched_item, EnterLoop): active_inames.append(sched_item.iname) if isinstance(sched_item, LeaveLoop): @@ -171,11 +171,11 @@ def find_active_inames_at(kernel, sched_index): def has_barrier_within(kernel, sched_index): - sched_item = kernel.schedule[sched_index] + sched_item = kernel.program_outline[sched_index] if isinstance(sched_item, BeginBlockItem): loop_contents, _ = gather_schedule_block( - kernel.schedule, sched_index) + kernel.program_outline, sched_index) from pytools import any return any(isinstance(subsched_item, Barrier) for subsched_item in loop_contents) @@ -186,11 +186,11 @@ def has_barrier_within(kernel, sched_index): def find_used_inames_within(kernel, sched_index): - sched_item = kernel.schedule[sched_index] + sched_item = kernel.program_outline[sched_index] if isinstance(sched_item, BeginBlockItem): loop_contents, _ = gather_schedule_block( - kernel.schedule, sched_index) + kernel.program_outline, sched_index) run_insns = [subsched_item for subsched_item in loop_contents if isinstance(subsched_item, RunInstruction)] @@ -456,12 +456,12 @@ def format_insn(kernel, insn_id): Fore.CYAN, str(insn), Style.RESET_ALL) -def dump_schedule(kernel, schedule): +def dump_schedule(kernel, program_outline): lines = [] indent = "" from loopy.kernel.data import MultiAssignmentBase - for sched_item in schedule: + for sched_item in program_outline: if isinstance(sched_item, EnterLoop): lines.append(indent + "for %s" % sched_item.iname) indent += " " @@ -523,13 +523,13 @@ class ScheduleDebugger: sys.stdout.flush() self.wrote_status = 2 - def log_success(self, schedule): + def log_success(self, program_outline): self.success_counter += 1 self.update() - def log_dead_end(self, schedule): - if len(schedule) > len(self.longest_rejected_schedule): - self.longest_rejected_schedule = schedule + def log_dead_end(self, program_outline): + if len(program_outline) > len(self.longest_rejected_schedule): + self.longest_rejected_schedule = program_outline self.dead_end_counter += 1 self.update() @@ -598,7 +598,7 @@ class SchedulerState(ImmutableRecord): The inames of the last entered subkernel - .. attribute:: schedule + .. attribute:: program_outline .. attribute:: scheduled_insn_ids @@ -607,7 +607,7 @@ class SchedulerState(ImmutableRecord): .. attribute:: preschedule A sequence of schedule items that must be inserted into the - schedule, maintaining the same relative ordering. Newly scheduled + program_outline, maintaining the same relative ordering. Newly scheduled items may interleave this sequence. .. attribute:: prescheduled_insn_ids @@ -677,7 +677,7 @@ def generate_loop_schedules_internal( if debug is not None: if (debug.debug_length is not None - and len(sched_state.schedule) >= debug.debug_length): + and len(sched_state.program_outline) >= debug.debug_length): debug_mode = True if debug_mode: @@ -688,7 +688,7 @@ def generate_loop_schedules_internal( print(kernel.stringify(with_dependencies=True)) print(75*"=") print("CURRENT SCHEDULE:") - print(dump_schedule(sched_state.kernel, sched_state.schedule)) + print(dump_schedule(sched_state.kernel, sched_state.program_outline)) if sched_state.preschedule: print(75*"=") print("PRESCHEDULED ITEMS AWAITING SCHEDULING:") @@ -703,7 +703,7 @@ def generate_loop_schedules_internal( if debug.debug_length == len(debug.longest_rejected_schedule): print("WHY IS THIS A DEAD-END SCHEDULE?") - #if len(schedule) == 2: + #if len(program_outline) == 2: #from pudb import set_trace; set_trace() # }}} @@ -714,7 +714,8 @@ def generate_loop_schedules_internal( assert sched_state.within_subkernel is False for result in generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + program_outline=sched_state.program_outline + ( + next_preschedule_item,), preschedule=sched_state.preschedule[1:], within_subkernel=True, may_schedule_global_barriers=False, @@ -729,7 +730,8 @@ def generate_loop_schedules_internal( if sched_state.active_inames == sched_state.enclosing_subkernel_inames: for result in generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + program_outline=sched_state.program_outline + ( + next_preschedule_item,), preschedule=sched_state.preschedule[1:], within_subkernel=False, may_schedule_global_barriers=True), @@ -750,7 +752,8 @@ def generate_loop_schedules_internal( and next_preschedule_item.originating_insn_id is None): for result in generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + program_outline=sched_state.program_outline + ( + next_preschedule_item,), preschedule=sched_state.preschedule[1:]), allow_boost=rec_allow_boost, debug=debug): @@ -930,8 +933,9 @@ def generate_loop_schedules_internal( scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, insn_ids_to_try=new_insn_ids_to_try, - schedule=( - sched_state.schedule + (RunInstruction(insn_id=insn.id),)), + program_outline=( + sched_state.program_outline + ( + RunInstruction(insn_id=insn.id),)), preschedule=( sched_state.preschedule if insn_id not in sched_state.prescheduled_insn_ids @@ -997,7 +1001,7 @@ def generate_loop_schedules_internal( last_entered_loop not in subdep.boostable_into): print( "%(warn)swarning:%(reset_all)s '%(iname)s', " - "which the schedule is " + "which the program_outline is " "currently stuck inside of, seems mis-nested. " "'%(subdep)s' must occur " "before '%(dep)s', " "but '%(subdep)s must be outside " @@ -1026,7 +1030,7 @@ def generate_loop_schedules_internal( seen_an_insn = False ignore_count = 0 - for sched_item in sched_state.schedule[::-1]: + for sched_item in sched_state.program_outline[::-1]: if isinstance(sched_item, RunInstruction): seen_an_insn = True elif isinstance(sched_item, LeaveLoop): @@ -1044,8 +1048,8 @@ def generate_loop_schedules_internal( for sub_sched in generate_loop_schedules_internal( sched_state.copy( - schedule=( - sched_state.schedule + program_outline=( + sched_state.program_outline + (LeaveLoop(iname=last_entered_loop),)), active_inames=sched_state.active_inames[:-1], preschedule=( @@ -1188,7 +1192,7 @@ def generate_loop_schedules_internal( # {{{ tier building - # Build priority tiers. If a schedule is found in the first tier, then + # Build priority tiers. If a program_outline is found in the first tier, then # loops in the second are not even tried (and so on). loop_priority_set = set().union(*[set(prio) for prio in @@ -1250,14 +1254,14 @@ def generate_loop_schedules_internal( key=lambda iname: ( iname_to_usefulness.get(iname, 0), # Sort by iname to achieve deterministic - # ordering of generated schedules. + # ordering of generated program_outlines. iname), reverse=True): for sub_sched in generate_loop_schedules_internal( sched_state.copy( - schedule=( - sched_state.schedule + program_outline=( + sched_state.program_outline + (EnterLoop(iname=iname),)), active_inames=( sched_state.active_inames + (iname,)), @@ -1281,8 +1285,8 @@ def generate_loop_schedules_internal( if debug_mode: print(75*"=") - inp = six.moves.input("Hit Enter for next schedule, " - "or enter a number to examine schedules of a " + inp = six.moves.input("Hit Enter for next program_outline, " + "or enter a number to examine program_outlines of a " "different length:") if inp: raise ScheduleDebugInput(inp) @@ -1292,7 +1296,7 @@ def generate_loop_schedules_internal( and not sched_state.unscheduled_insn_ids and not sched_state.preschedule): # if done, yield result - debug.log_success(sched_state.schedule) + debug.log_success(sched_state.program_outline) for boost_insn_id, boost_inames in sched_state.uses_of_boostability: warn_with_kernel( @@ -1303,7 +1307,7 @@ def generate_loop_schedules_internal( % (boost_insn_id, ", ".join(boost_inames)), DeprecationWarning) - yield sched_state.schedule + yield sched_state.program_outline else: if not allow_boost and allow_boost is not None: @@ -1315,18 +1319,18 @@ def generate_loop_schedules_internal( else: # dead end if debug is not None: - debug.log_dead_end(sched_state.schedule) + debug.log_dead_end(sched_state.program_outline) # }}} # {{{ convert barrier instructions to proper barriers -def convert_barrier_instructions_to_barriers(kernel, schedule): +def convert_barrier_instructions_to_barriers(kernel, program_outline): from loopy.kernel.instruction import BarrierInstruction result = [] - for sched_item in schedule: + for sched_item in program_outline: if isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] if isinstance(insn, BarrierInstruction): @@ -1585,17 +1589,17 @@ def barrier_kind_more_or_equally_global(kind1, kind2): return (kind1 == kind2) or (kind1 == "global" and kind2 == "local") -def insn_ids_reaching_end_without_intervening_barrier(schedule, kind): - return _insn_ids_reaching_end(schedule, kind, reverse=False) +def insn_ids_reaching_end_without_intervening_barrier(program_outline, kind): + return _insn_ids_reaching_end(program_outline, kind, reverse=False) -def insn_ids_reachable_from_start_without_intervening_barrier(schedule, kind): - return _insn_ids_reaching_end(schedule, kind, reverse=True) +def insn_ids_reachable_from_start_without_intervening_barrier(program_outline, kind): + return _insn_ids_reaching_end(program_outline, kind, reverse=True) -def _insn_ids_reaching_end(schedule, kind, reverse): +def _insn_ids_reaching_end(program_outline, kind, reverse): if reverse: - schedule = reversed(schedule) + program_outline = reversed(program_outline) enter_scope_item_kind = LeaveLoop leave_scope_item_kind = EnterLoop else: @@ -1604,7 +1608,7 @@ def _insn_ids_reaching_end(schedule, kind, reverse): insn_ids_alive_at_scope = [set()] - for sched_item in schedule: + for sched_item in program_outline: if isinstance(sched_item, enter_scope_item_kind): insn_ids_alive_at_scope.append(set()) elif isinstance(sched_item, leave_scope_item_kind): @@ -1644,7 +1648,7 @@ def _insn_ids_reaching_end(schedule, kind, reverse): return insn_ids_alive_at_scope[-1] -def append_barrier_or_raise_error(schedule, dep, verify_only): +def append_barrier_or_raise_error(program_outline, dep, verify_only): if verify_only: from loopy.diagnostic import MissingBarrierError raise MissingBarrierError( @@ -1662,14 +1666,15 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): comment = "for %s (%s)" % ( dep.variable, dep.dep_descr.format( tgt=dep.target.id, src=dep.source.id)) - schedule.append(Barrier( + program_outline.append(Barrier( comment=comment, synchronization_kind=dep.var_kind, mem_kind=dep.var_kind, originating_insn_id=None)) -def insert_barriers(kernel, schedule, synchronization_kind, verify_only, level=0): +def insert_barriers( + kernel, program_outline, synchronization_kind, verify_only, level=0): """ :arg synchronization_kind: "local" or "global". The :attr:`Barrier.synchronization_kind` to be inserted. Generally, this @@ -1682,26 +1687,26 @@ def insert_barriers(kernel, schedule, synchronization_kind, verify_only, level=0 # {{{ insert barriers at outermost scheduling level - def insert_barriers_at_outer_level(schedule, reverse=False): + def insert_barriers_at_outer_level(program_outline, reverse=False): dep_tracker = DependencyTracker(kernel, var_kind=synchronization_kind, reverse=reverse) if reverse: # Populate the dependency tracker with sources from the tail end of - # the schedule block. + # the program_outline block. for insn_id in ( insn_ids_reaching_end_without_intervening_barrier( - schedule, synchronization_kind)): + program_outline, synchronization_kind)): dep_tracker.add_source(insn_id) result = [] i = 0 - while i < len(schedule): - sched_item = schedule[i] + while i < len(program_outline): + sched_item = program_outline[i] if isinstance(sched_item, EnterLoop): - subloop, new_i = gather_schedule_block(schedule, i) + subloop, new_i = gather_schedule_block(program_outline, i) loop_head = ( insn_ids_reachable_from_start_without_intervening_barrier( @@ -1768,7 +1773,7 @@ def insert_barriers(kernel, schedule, synchronization_kind, verify_only, level=0 i += 1 else: - raise ValueError("unexpected schedule item type '%s'" + raise ValueError("unexpected program_outline item type '%s'" % type(sched_item).__name__) return result @@ -1779,11 +1784,11 @@ def insert_barriers(kernel, schedule, synchronization_kind, verify_only, level=0 result = [] i = 0 - while i < len(schedule): - sched_item = schedule[i] + while i < len(program_outline): + sched_item = program_outline[i] if isinstance(sched_item, EnterLoop): - subloop, new_i = gather_schedule_block(schedule, i) + subloop, new_i = gather_schedule_block(program_outline, i) new_subloop = insert_barriers( kernel, subloop[1:-1], synchronization_kind, verify_only, level + 1) @@ -1798,7 +1803,7 @@ def insert_barriers(kernel, schedule, synchronization_kind, verify_only, level=0 i += 1 else: - raise ValueError("unexpected schedule item type '%s'" + raise ValueError("unexpected program_outline item type '%s'" % type(sched_item).__name__) # }}} @@ -1848,11 +1853,12 @@ def generate_loop_schedules_inner(kernel, debug_args={}): from loopy.check import pre_schedule_checks pre_schedule_checks(kernel) - schedule_count = 0 + program_outline_count = 0 debug = ScheduleDebugger(**debug_args) - preschedule = kernel.schedule if kernel.state == KernelState.SCHEDULED else () + preschedule = kernel.program_outline if ( + kernel.state == KernelState.SCHEDULED) else () prescheduled_inames = set( insn.iname @@ -1900,7 +1906,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): entered_inames=frozenset(), enclosing_subkernel_inames=(), - schedule=(), + program_outline=(), unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), scheduled_insn_ids=frozenset(), @@ -1972,7 +1978,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): logger.debug("%s: barrier insertion: done" % kernel.name) new_kernel = kernel.copy( - schedule=gen_sched, + program_outline=gen_sched, state=KernelState.SCHEDULED) from loopy.schedule.device_mapping import \ @@ -1987,7 +1993,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): debug.start() - schedule_count += 1 + program_outline_count += 1 except KeyboardInterrupt: print() @@ -1998,14 +2004,14 @@ def generate_loop_schedules_inner(kernel, debug_args={}): raise debug.done_scheduling() - if not schedule_count: + if not program_outline_count: print(75*"-") - print("ERROR: Sorry--loo.py did not find a schedule for your kernel.") + print("ERROR: Sorry--loo.py did not find a program_outline for your kernel.") print(75*"-") print_longest_dead_end() - raise RuntimeError("no valid schedules found") + raise RuntimeError("no valid program_outlines found") - logger.info("%s: schedule done" % kernel.name) + logger.info("%s: program_outline done" % kernel.name) # }}} @@ -2038,13 +2044,13 @@ def get_one_scheduled_kernel(kernel): try: result = schedule_cache[sched_cache_key] - logger.debug("%s: schedule cache hit" % kernel.name) + logger.debug("%s: program_outline cache hit" % kernel.name) from_cache = True except KeyError: pass if not from_cache: - with ProcessLogger(logger, "%s: schedule" % kernel.name): + with ProcessLogger(logger, "%s: program_outline" % kernel.name): with MinRecursionLimitForScheduling(kernel): result = _get_one_scheduled_kernel_inner(kernel) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 59afb07d2..235326664 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -41,13 +41,13 @@ def map_schedule_onto_host_or_device(kernel): + kernel.target.device_program_name_suffix) if not kernel.target.split_kernel_at_global_barriers(): - new_schedule = ( + new_program_outline = ( [CallKernel(kernel_name=device_prog_name_gen(), extra_args=[], extra_inames=[])] + - list(kernel.schedule) + + list(kernel.program_outline) + [ReturnFromKernel(kernel_name=kernel.name)]) - kernel = kernel.copy(schedule=new_schedule) + kernel = kernel.copy(program_outline=new_program_outline) else: kernel = map_schedule_onto_host_or_device_impl( kernel, device_prog_name_gen) @@ -56,21 +56,21 @@ def map_schedule_onto_host_or_device(kernel): def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): - schedule = kernel.schedule - loop_bounds = get_block_boundaries(schedule) + program_outline = kernel.program_outline + loop_bounds = get_block_boundaries(program_outline) # {{{ inner mapper function dummy_call = CallKernel(kernel_name="", extra_args=[], extra_inames=[]) dummy_return = ReturnFromKernel(kernel_name="") - def inner_mapper(start_idx, end_idx, new_schedule): - schedule_required_splitting = False + def inner_mapper(start_idx, end_idx, new_program_outline): + program_outline_required_splitting = False i = start_idx current_chunk = [] while i <= end_idx: - sched_item = schedule[i] + sched_item = program_outline[i] if isinstance(sched_item, RunInstruction): current_chunk.append(sched_item) @@ -78,43 +78,43 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): elif isinstance(sched_item, EnterLoop): loop_end = loop_bounds[i] - inner_schedule = [] + inner_program_outline = [] loop_required_splitting = inner_mapper( - i + 1, loop_end - 1, inner_schedule) + i + 1, loop_end - 1, inner_program_outline) - start_item = schedule[i] - end_item = schedule[loop_end] + start_item = program_outline[i] + end_item = program_outline[loop_end] i = loop_end + 1 if loop_required_splitting: - schedule_required_splitting = True + program_outline_required_splitting = True if current_chunk: - new_schedule.extend( + new_program_outline.extend( [dummy_call.copy()] + current_chunk + [dummy_return.copy()]) - new_schedule.extend( + new_program_outline.extend( [start_item] + - inner_schedule + + inner_program_outline + [end_item]) current_chunk = [] else: current_chunk.extend( [start_item] + - inner_schedule + + inner_program_outline + [end_item]) elif isinstance(sched_item, Barrier): if sched_item.synchronization_kind == "global": # Wrap the current chunk into a kernel call. - schedule_required_splitting = True + program_outline_required_splitting = True if current_chunk: - new_schedule.extend( + new_program_outline.extend( [dummy_call.copy()] + current_chunk + [dummy_return.copy()]) - new_schedule.append(sched_item) + new_program_outline.append(sched_item) current_chunk = [] else: current_chunk.append(sched_item) @@ -123,45 +123,45 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): raise LoopyError("unexpected type of schedule item: %s" % type(sched_item).__name__) - if current_chunk and schedule_required_splitting: - # Wrap remainder of schedule into a kernel call. - new_schedule.extend( + if current_chunk and program_outline_required_splitting: + # Wrap remainder of program_outline into a kernel call. + new_program_outline.extend( [dummy_call.copy()] + current_chunk + [dummy_return.copy()]) else: - new_schedule.extend(current_chunk) + new_program_outline.extend(current_chunk) - return schedule_required_splitting + return program_outline_required_splitting # }}} - new_schedule = [] - split_kernel = inner_mapper(0, len(schedule) - 1, new_schedule) + new_program_outline = [] + split_kernel = inner_mapper(0, len(program_outline) - 1, new_program_outline) if not split_kernel: # Wrap everything into a kernel call. - new_schedule = ( + new_program_outline = ( [dummy_call.copy()] + - new_schedule + + new_program_outline + [dummy_return.copy()]) # Assign names, extra_inames to CallKernel / ReturnFromKernel instructions inames = [] - for idx, sched_item in enumerate(new_schedule): + for idx, sched_item in enumerate(new_program_outline): if isinstance(sched_item, CallKernel): last_kernel_name = device_prog_name_gen() - new_schedule[idx] = sched_item.copy( + new_program_outline[idx] = sched_item.copy( kernel_name=last_kernel_name, extra_inames=list(inames)) elif isinstance(sched_item, ReturnFromKernel): - new_schedule[idx] = sched_item.copy( + new_program_outline[idx] = sched_item.copy( kernel_name=last_kernel_name) elif isinstance(sched_item, EnterLoop): inames.append(sched_item.iname) elif isinstance(sched_item, LeaveLoop): inames.pop() - new_kernel = kernel.copy(schedule=new_schedule) + new_kernel = kernel.copy(program_outline=new_program_outline) return new_kernel diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index e0129fd98..1ac3587e2 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -27,7 +27,7 @@ from loopy.kernel.data import AddressSpace # {{{ block boundary finder -def get_block_boundaries(schedule): +def get_block_boundaries(program_outline): """ Return a dictionary mapping indices of :class:`loopy.schedule.BlockBeginItem`s to @@ -36,7 +36,7 @@ def get_block_boundaries(schedule): from loopy.schedule import (BeginBlockItem, EndBlockItem) block_bounds = {} active_blocks = [] - for idx, sched_item in enumerate(schedule): + for idx, sched_item in enumerate(program_outline): if isinstance(sched_item, BeginBlockItem): active_blocks.append(idx) elif isinstance(sched_item, EndBlockItem): @@ -75,12 +75,12 @@ def temporaries_written_in_subkernel(kernel, subkernel): def add_extra_args_to_schedule(kernel): """ Fill the `extra_args` fields in all the :class:`loopy.schedule.CallKernel` - instructions in the schedule with global temporaries. + instructions in the program_outline with global temporaries. """ - new_schedule = [] + new_program_outline = [] from loopy.schedule import CallKernel - for sched_item in kernel.schedule: + for sched_item in kernel.program_outline: if isinstance(sched_item, CallKernel): subkernel = sched_item.kernel_name @@ -98,11 +98,11 @@ def add_extra_args_to_schedule(kernel): and tv not in sched_item.extra_args) - new_schedule.append(sched_item.copy( + new_program_outline.append(sched_item.copy( extra_args=sched_item.extra_args + sorted(more_args))) else: - new_schedule.append(sched_item) + new_program_outline.append(sched_item) - return kernel.copy(schedule=new_schedule) + return kernel.copy(program_outline=new_program_outline) # }}} diff --git a/loopy/statistics.py b/loopy/statistics.py index 10d29daad..601c8992c 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1740,7 +1740,7 @@ def get_synchronization_map(knl, subgroup_size=None): else: return one - for sched_item in knl.schedule: + for sched_item in knl.program_outline: if isinstance(sched_item, EnterLoop): if sched_item.iname: # (if not empty) iname_list.append(sched_item.iname) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 01d26dd82..4e9048c10 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -515,7 +515,7 @@ class CFamilyASTBuilder(ASTBuilderBase): # whether this is the first device program in the schedule. is_first_dev_prog = codegen_state.is_generating_device_code for i in range(schedule_index): - if isinstance(kernel.schedule[i], CallKernel): + if isinstance(kernel.program_outline[i], CallKernel): is_first_dev_prog = False break if is_first_dev_prog: @@ -604,7 +604,7 @@ class CFamilyASTBuilder(ASTBuilderBase): from loopy.schedule.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - subkernel = kernel.schedule[schedule_index].kernel_name + subkernel = kernel.program_outline[schedule_index].kernel_name sub_knl_temps = ( temporaries_read_in_subkernel(kernel, subkernel) | temporaries_written_in_subkernel(kernel, subkernel)) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 50fd1026f..fcf15ee78 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -249,7 +249,7 @@ class CUDACASTBuilder(CFamilyASTBuilder): _, local_grid_size = \ codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( - codegen_state.kernel.schedule, schedule_index)) + codegen_state.kernel.program_outline, schedule_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_grid_size): diff --git a/loopy/target/execution.py b/loopy/target/execution.py index c8f0d4090..6d31885ae 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -754,7 +754,7 @@ class KernelExecutorBase(object): from loopy.type_inference import infer_unknown_types kernel = infer_unknown_types(kernel, expect_completion=True) - if kernel.schedule is None: + if kernel.program_outline is None: from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 04d436043..8ca0df285 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -407,7 +407,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): from loopy.schedule import get_insn_ids_for_block_at _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( - codegen_state.kernel.schedule, schedule_index)) + codegen_state.kernel.program_outline, schedule_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_sizes): diff --git a/loopy/transform/save.py b/loopy/transform/save.py index baa558a72..784fb72f6 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -64,17 +64,17 @@ class LivenessAnalysis(object): def __init__(self, kernel): self.kernel = kernel - self.schedule = self.kernel.schedule + self.program_outline = self.kernel.program_outline @memoize_method def get_successor_relation(self): successors = {} - block_bounds = get_block_boundaries(self.kernel.schedule) + block_bounds = get_block_boundaries(self.kernel.program_outline) for idx, (item, next_item) in enumerate(zip( - reversed(self.schedule), - reversed(self.schedule + [None]))): - sched_idx = len(self.schedule) - idx - 1 + reversed(self.program_outline), + reversed(self.program_outline + [None]))): + sched_idx = len(self.program_outline) - idx - 1 # Look at next_item if next_item is None: @@ -105,10 +105,10 @@ class LivenessAnalysis(object): return successors def get_gen_and_kill_sets(self): - gen = dict((idx, set()) for idx in range(len(self.schedule))) - kill = dict((idx, set()) for idx in range(len(self.schedule))) + gen = dict((idx, set()) for idx in range(len(self.program_outline))) + kill = dict((idx, set()) for idx in range(len(self.program_outline))) - for sched_idx, sched_item in enumerate(self.schedule): + for sched_idx, sched_item in enumerate(self.program_outline): if not isinstance(sched_item, RunInstruction): continue insn = self.kernel.id_to_insn[sched_item.insn_id] @@ -141,14 +141,14 @@ class LivenessAnalysis(object): gen, kill = self.get_gen_and_kill_sets() # Fixed point iteration for liveness analysis - lr = LivenessResult.make_empty(len(self.schedule)) + lr = LivenessResult.make_empty(len(self.program_outline)) prev_lr = None while prev_lr != lr: from copy import deepcopy prev_lr = deepcopy(lr) - for idx in range(len(self.schedule) - 1, -1, -1): + for idx in range(len(self.program_outline) - 1, -1, -1): for succ in successors[idx]: lr[idx].live_out.update(lr[succ].live_in) lr[idx].live_in = gen[idx] | (lr[idx].live_out - kill[idx]) @@ -160,13 +160,13 @@ class LivenessAnalysis(object): def print_liveness(self): print(75 * "-") print("LIVE IN:") - for sched_idx, sched_item in enumerate(self.schedule): + for sched_idx, sched_item in enumerate(self.program_outline): print("{item}: {{{vars}}}".format( item=sched_idx, vars=", ".join(sorted(self[sched_idx].live_in)))) print(75 * "-") print("LIVE OUT:") - for sched_idx, sched_item in enumerate(self.schedule): + for sched_idx, sched_item in enumerate(self.program_outline): print("{item}: {{{vars}}}".format( item=sched_idx, vars=", ".join(sorted(self[sched_idx].live_out)))) @@ -316,7 +316,7 @@ class TemporarySaver(object): def subkernel_to_slice_indices(self): result = {} - for sched_item_idx, sched_item in enumerate(self.kernel.schedule): + for sched_item_idx, sched_item in enumerate(self.kernel.program_outline): if isinstance(sched_item, CallKernel): start_idx = sched_item_idx elif isinstance(sched_item, ReturnFromKernel): @@ -331,7 +331,7 @@ class TemporarySaver(object): within_subkernel = False result = {} - for sched_item_idx, sched_item in enumerate(self.kernel.schedule): + for sched_item_idx, sched_item in enumerate(self.kernel.program_outline): if isinstance(sched_item, CallKernel): within_subkernel = True result[sched_item.kernel_name] = frozenset(current_outer_inames) @@ -356,14 +356,14 @@ class TemporarySaver(object): try: pre_barrier = next(item for item in - self.kernel.schedule[subkernel_start::-1] + self.kernel.program_outline[subkernel_start::-1] if is_global_barrier(item)).originating_insn_id except StopIteration: pre_barrier = None try: post_barrier = next(item for item in - self.kernel.schedule[subkernel_end:] + self.kernel.program_outline[subkernel_end:] if is_global_barrier(item)).originating_insn_id except StopIteration: post_barrier = None @@ -751,7 +751,7 @@ def save_and_reload_temporaries(knl): from loopy.schedule.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - for sched_idx, sched_item in enumerate(knl.schedule): + for sched_idx, sched_item in enumerate(knl.program_outline): if isinstance(sched_item, CallKernel): # Any written temporary that is live-out needs to be read into @@ -771,7 +771,7 @@ def save_and_reload_temporaries(knl): saver.reload(temporary, sched_item.kernel_name) elif isinstance(sched_item, ReturnFromKernel): - if sched_idx == len(knl.schedule) - 1: + if sched_idx == len(knl.program_outline) - 1: # Kernel exit: nothing live interesting_temporaries = set() else: diff --git a/test/test_loopy.py b/test/test_loopy.py index 6b78ac26b..6f9af8c47 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2214,7 +2214,7 @@ def barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): seen_barrier = False loop_level = 0 - for sched_item in knl.schedule: + for sched_item in knl.program_outline: if isinstance(sched_item, RunInstruction): if sched_item.insn_id == id1: watch_for_barrier = True -- GitLab From dea8fe606368e855890e11d5fff6358e52fdccf6 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 23 Feb 2020 14:02:44 -0600 Subject: [PATCH 02/56] renamed preschedule->preoutline --- loopy/schedule/__init__.py | 132 ++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 8a29d1825..04339b5ff 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -604,19 +604,19 @@ class SchedulerState(ImmutableRecord): .. attribute:: unscheduled_insn_ids - .. attribute:: preschedule + .. attribute:: preoutline A sequence of schedule items that must be inserted into the program_outline, maintaining the same relative ordering. Newly scheduled items may interleave this sequence. - .. attribute:: prescheduled_insn_ids + .. attribute:: preoutlined_insn_ids - A :class:`frozenset` of any instruction that started prescheduled + A :class:`frozenset` of any instruction that started preoutlined - .. attribute:: prescheduled_inames + .. attribute:: preoutlined_inames - A :class:`frozenset` of any iname that started prescheduled + A :class:`frozenset` of any iname that started preoutlined .. attribute:: may_schedule_global_barriers @@ -666,9 +666,9 @@ def generate_loop_schedules_internal( active_inames_set = frozenset(sched_state.active_inames) - next_preschedule_item = ( - sched_state.preschedule[0] - if len(sched_state.preschedule) > 0 + next_preoutline_item = ( + sched_state.preoutline[0] + if len(sched_state.preoutline) > 0 else None) # {{{ decide about debug mode @@ -689,10 +689,10 @@ def generate_loop_schedules_internal( print(75*"=") print("CURRENT SCHEDULE:") print(dump_schedule(sched_state.kernel, sched_state.program_outline)) - if sched_state.preschedule: + if sched_state.preoutline: print(75*"=") - print("PRESCHEDULED ITEMS AWAITING SCHEDULING:") - print(dump_schedule(sched_state.kernel, sched_state.preschedule)) + print("PREOUTLINED ITEMS AWAITING SCHEDULING:") + print(dump_schedule(sched_state.kernel, sched_state.preoutline)) #print("boost allowed:", allow_boost) print(75*"=") print("LOOP NEST MAP (inner: outer):") @@ -708,15 +708,15 @@ def generate_loop_schedules_internal( # }}} - # {{{ see if we have reached the start/end of kernel in the preschedule + # {{{ see if we have reached the start/end of kernel in the preoutline - if isinstance(next_preschedule_item, CallKernel): + if isinstance(next_preoutline_item, CallKernel): assert sched_state.within_subkernel is False for result in generate_loop_schedules_internal( sched_state.copy( program_outline=sched_state.program_outline + ( - next_preschedule_item,), - preschedule=sched_state.preschedule[1:], + next_preoutline_item,), + preoutline=sched_state.preoutline[1:], within_subkernel=True, may_schedule_global_barriers=False, enclosing_subkernel_inames=sched_state.active_inames), @@ -724,15 +724,15 @@ def generate_loop_schedules_internal( debug=debug): yield result - if isinstance(next_preschedule_item, ReturnFromKernel): + if isinstance(next_preoutline_item, ReturnFromKernel): assert sched_state.within_subkernel is True # Make sure all subkernel inames have finished. if sched_state.active_inames == sched_state.enclosing_subkernel_inames: for result in generate_loop_schedules_internal( sched_state.copy( program_outline=sched_state.program_outline + ( - next_preschedule_item,), - preschedule=sched_state.preschedule[1:], + next_preoutline_item,), + preoutline=sched_state.preoutline[1:], within_subkernel=False, may_schedule_global_barriers=True), allow_boost=rec_allow_boost, @@ -741,20 +741,20 @@ def generate_loop_schedules_internal( # }}} - # {{{ see if there are pending barriers in the preschedule + # {{{ see if there are pending barriers in the preoutline # Barriers that do not have an originating instruction are handled here. # (These are automatically inserted by insert_barriers().) Barriers with # originating instructions are handled as part of normal instruction # scheduling below. if ( - isinstance(next_preschedule_item, Barrier) - and next_preschedule_item.originating_insn_id is None): + isinstance(next_preoutline_item, Barrier) + and next_preoutline_item.originating_insn_id is None): for result in generate_loop_schedules_internal( sched_state.copy( program_outline=sched_state.program_outline + ( - next_preschedule_item,), - preschedule=sched_state.preschedule[1:]), + next_preoutline_item,), + preoutline=sched_state.preoutline[1:]), allow_boost=rec_allow_boost, debug=debug): yield result @@ -779,15 +779,15 @@ def generate_loop_schedules_internal( # Use previous instruction sorting result if it is available if sched_state.insn_ids_to_try is None: insn_ids_to_try = sorted( - # Non-prescheduled instructions go first. - sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids, + # Non-preoutlined instructions go first. + sched_state.unscheduled_insn_ids - sched_state.preoutlined_insn_ids, key=insn_sort_key, reverse=True) else: insn_ids_to_try = sched_state.insn_ids_to_try insn_ids_to_try.extend( insn_id - for item in sched_state.preschedule + for item in sched_state.preoutline for insn_id in sched_item_to_insn_id(item)) for insn_id in insn_ids_to_try: @@ -829,20 +829,20 @@ def generate_loop_schedules_internal( print("instruction '%s' won't work under inames '%s'" % (format_insn(kernel, insn.id), ",".join(have-want))) - # {{{ check if scheduling this insn is compatible with preschedule + # {{{ check if scheduling this insn is compatible with preoutline - if insn_id in sched_state.prescheduled_insn_ids: - if isinstance(next_preschedule_item, RunInstruction): - next_preschedule_insn_id = next_preschedule_item.insn_id - elif isinstance(next_preschedule_item, Barrier): - assert next_preschedule_item.originating_insn_id is not None - next_preschedule_insn_id = next_preschedule_item.originating_insn_id + if insn_id in sched_state.preoutlined_insn_ids: + if isinstance(next_preoutline_item, RunInstruction): + next_preoutline_insn_id = next_preoutline_item.insn_id + elif isinstance(next_preoutline_item, Barrier): + assert next_preoutline_item.originating_insn_id is not None + next_preoutline_insn_id = next_preoutline_item.originating_insn_id else: - next_preschedule_insn_id = None + next_preoutline_insn_id = None - if next_preschedule_insn_id != insn_id: + if next_preoutline_insn_id != insn_id: if debug_mode: - print("can't schedule '%s' because another preschedule " + print("can't schedule '%s' because another preoutline " "instruction precedes it" % format_insn(kernel, insn.id)) is_ready = False @@ -936,10 +936,10 @@ def generate_loop_schedules_internal( program_outline=( sched_state.program_outline + ( RunInstruction(insn_id=insn.id),)), - preschedule=( - sched_state.preschedule - if insn_id not in sched_state.prescheduled_insn_ids - else sched_state.preschedule[1:]), + preoutline=( + sched_state.preoutline + if insn_id not in sched_state.preoutlined_insn_ids + else sched_state.preoutline[1:]), active_group_counts=new_active_group_counts, uses_of_boostability=( sched_state.uses_of_boostability @@ -969,13 +969,13 @@ def generate_loop_schedules_internal( can_leave = True if ( - last_entered_loop in sched_state.prescheduled_inames + last_entered_loop in sched_state.preoutlined_inames and not ( - isinstance(next_preschedule_item, LeaveLoop) - and next_preschedule_item.iname == last_entered_loop)): - # A prescheduled loop can only be left if the preschedule agrees. + isinstance(next_preoutline_item, LeaveLoop) + and next_preoutline_item.iname == last_entered_loop)): + # A preoutlined loop can only be left if the preoutline agrees. if debug_mode: - print("cannot leave '%s' because of preschedule constraints" + print("cannot leave '%s' because of preoutline constraints" % last_entered_loop) can_leave = False elif last_entered_loop not in sched_state.breakable_inames: @@ -1052,11 +1052,11 @@ def generate_loop_schedules_internal( sched_state.program_outline + (LeaveLoop(iname=last_entered_loop),)), active_inames=sched_state.active_inames[:-1], - preschedule=( - sched_state.preschedule + preoutline=( + sched_state.preoutline if last_entered_loop - not in sched_state.prescheduled_inames - else sched_state.preschedule[1:]), + not in sched_state.preoutlined_inames + else sched_state.preoutline[1:]), ), allow_boost=rec_allow_boost, debug=debug): yield sub_sched @@ -1098,12 +1098,12 @@ def generate_loop_schedules_internal( # {{{ check if scheduling this iname now is allowed/plausible if ( - iname in sched_state.prescheduled_inames + iname in sched_state.preoutlined_inames and not ( - isinstance(next_preschedule_item, EnterLoop) - and next_preschedule_item.iname == iname)): + isinstance(next_preoutline_item, EnterLoop) + and next_preoutline_item.iname == iname)): if debug_mode: - print("scheduling %s prohibited by preschedule constraints" + print("scheduling %s prohibited by preoutline constraints" % iname) continue @@ -1268,10 +1268,10 @@ def generate_loop_schedules_internal( entered_inames=( sched_state.entered_inames | frozenset((iname,))), - preschedule=( - sched_state.preschedule - if iname not in sched_state.prescheduled_inames - else sched_state.preschedule[1:]), + preoutline=( + sched_state.preoutline + if iname not in sched_state.preoutlined_inames + else sched_state.preoutline[1:]), ), allow_boost=rec_allow_boost, debug=debug): @@ -1294,7 +1294,7 @@ def generate_loop_schedules_internal( if ( not sched_state.active_inames and not sched_state.unscheduled_insn_ids - and not sched_state.preschedule): + and not sched_state.preoutline): # if done, yield result debug.log_success(sched_state.program_outline) @@ -1857,17 +1857,17 @@ def generate_loop_schedules_inner(kernel, debug_args={}): debug = ScheduleDebugger(**debug_args) - preschedule = kernel.program_outline if ( + preoutline = kernel.program_outline if ( kernel.state == KernelState.SCHEDULED) else () - prescheduled_inames = set( + preoutlined_inames = set( insn.iname - for insn in preschedule + for insn in preoutline if isinstance(insn, EnterLoop)) - prescheduled_insn_ids = set( + preoutlined_insn_ids = set( insn_id - for item in preschedule + for item in preoutline for insn_id in sched_item_to_insn_id(item)) from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, @@ -1898,8 +1898,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}): ilp_inames=ilp_inames, vec_inames=vec_inames, - prescheduled_inames=prescheduled_inames, - prescheduled_insn_ids=prescheduled_insn_ids, + preoutlined_inames=preoutlined_inames, + preoutlined_insn_ids=preoutlined_insn_ids, # time-varying part active_inames=(), @@ -1913,7 +1913,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): within_subkernel=kernel.state != KernelState.SCHEDULED, may_schedule_global_barriers=True, - preschedule=preschedule, + preoutline=preoutline, insn_ids_to_try=None, # ilp and vec are not parallel for the purposes of the scheduler -- GitLab From da5e1218bc03f5d464f8142667f1010f761fec56 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 23 Feb 2020 14:07:46 -0600 Subject: [PATCH 03/56] renamed schedule directory to (still need to update imports/references) --- loopy/{schedule => outline}/__init__.py | 0 loopy/{schedule => outline}/device_mapping.py | 0 loopy/{schedule => outline}/tools.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename loopy/{schedule => outline}/__init__.py (100%) rename loopy/{schedule => outline}/device_mapping.py (100%) rename loopy/{schedule => outline}/tools.py (100%) diff --git a/loopy/schedule/__init__.py b/loopy/outline/__init__.py similarity index 100% rename from loopy/schedule/__init__.py rename to loopy/outline/__init__.py diff --git a/loopy/schedule/device_mapping.py b/loopy/outline/device_mapping.py similarity index 100% rename from loopy/schedule/device_mapping.py rename to loopy/outline/device_mapping.py diff --git a/loopy/schedule/tools.py b/loopy/outline/tools.py similarity index 100% rename from loopy/schedule/tools.py rename to loopy/outline/tools.py -- GitLab From 3dbe4007f6fc3d0cf3a1c32a8ec0ff2e93c20db4 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 23 Feb 2020 14:17:23 -0600 Subject: [PATCH 04/56] rename references to loopy.schedule->loopy.outline --- examples/python/global_barrier_removal.py | 2 +- loopy/__init__.py | 2 +- loopy/check.py | 6 +++--- loopy/codegen/__init__.py | 2 +- loopy/codegen/bounds.py | 4 ++-- loopy/codegen/control.py | 6 +++--- loopy/codegen/loop.py | 2 +- loopy/codegen/result.py | 2 +- loopy/kernel/__init__.py | 4 ++-- loopy/kernel/tools.py | 10 +++++----- loopy/loop.py | 2 +- loopy/outline/__init__.py | 6 +++--- loopy/outline/device_mapping.py | 4 ++-- loopy/outline/tools.py | 10 +++++----- loopy/statistics.py | 2 +- loopy/target/c/__init__.py | 4 ++-- loopy/target/cuda.py | 2 +- loopy/target/execution.py | 2 +- loopy/target/opencl.py | 2 +- loopy/transform/save.py | 6 +++--- test/test_loopy.py | 8 ++++---- 21 files changed, 44 insertions(+), 44 deletions(-) diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index 7ab049cd1..9e289b270 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -23,7 +23,7 @@ knl = lp.add_and_infer_dtypes(knl, from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) -from loopy.schedule import get_one_scheduled_kernel +from loopy.outline import get_one_scheduled_kernel knl = get_one_scheduled_kernel(knl) # map schedule onto host or device diff --git a/loopy/__init__.py b/loopy/__init__.py index b60de6e2d..715e26e60 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -123,7 +123,7 @@ from loopy.transform.add_barrier import add_barrier from loopy.type_inference import infer_unknown_types from loopy.preprocess import preprocess_kernel, realize_reduction -from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel +from loopy.outline import generate_loop_schedules, get_one_scheduled_kernel from loopy.statistics import (ToCountMap, CountGranularity, stringify_stats_mapping, Op, MemAccess, get_op_poly, get_op_map, get_lmem_access_poly, get_DRAM_access_poly, get_gmem_access_poly, get_mem_access_map, diff --git a/loopy/check.py b/loopy/check.py index 494c04302..bb0c63f5b 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -682,7 +682,7 @@ def pre_schedule_checks(kernel): # {{{ check for unused hw axes def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): - from loopy.schedule import (CallKernel, RunInstruction, + from loopy.outline import (CallKernel, RunInstruction, Barrier, EnterLoop, LeaveLoop, ReturnFromKernel, get_insn_ids_for_block_at, gather_schedule_block) @@ -815,7 +815,7 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): for subkernel in get_subkernels(kernel): defined_base_storage = set() - from loopy.schedule.tools import ( + from loopy.outline.tools import ( temporaries_written_in_subkernel, temporaries_read_in_subkernel) for temporary in temporaries_written_in_subkernel(kernel, subkernel): @@ -855,7 +855,7 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): def check_that_all_insns_are_scheduled(kernel): all_schedulable_insns = set(insn.id for insn in kernel.instructions) - from loopy.schedule import sched_item_to_insn_id + from loopy.outline import sched_item_to_insn_id scheduled_insns = set( insn_id for sched_item in kernel.program_outline diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 70feac54f..ec109e6b3 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -385,7 +385,7 @@ def generate_code_v2(kernel): kernel = preprocess_kernel(kernel) if kernel.program_outline is None: - from loopy.schedule import get_one_scheduled_kernel + from loopy.outline import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) if kernel.state != KernelState.SCHEDULED: diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 3b44cf5d9..30d98fecf 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -56,7 +56,7 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai # {{{ on which inames may a conditional depend? def get_usable_inames_for_conditional(kernel, sched_index): - from loopy.schedule import ( + from loopy.outline import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, VectorizeTag, @@ -70,7 +70,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): for sched_item_index, sched_item in enumerate( kernel.program_outline[:sched_index]): - from loopy.schedule import CallKernel, ReturnFromKernel + from loopy.outline import CallKernel, ReturnFromKernel if isinstance(sched_item, CallKernel): within_subkernel = True subkernel_index = sched_item_index diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 6b381850b..bd8b0d9c2 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -26,7 +26,7 @@ THE SOFTWARE. from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl -from loopy.schedule import ( +from loopy.outline import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, gather_schedule_block, generate_sub_sched_items) from loopy.diagnostic import LoopyError @@ -72,7 +72,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code - from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at) + from loopy.outline import (gather_schedule_block, get_insn_ids_for_block_at) _, past_end_i = gather_schedule_block(kernel.program_outline, sched_index) assert past_end_i <= codegen_state.schedule_index_end @@ -277,7 +277,7 @@ def build_loop_nest(codegen_state, schedule_index): .. attribute:: used_inames_within """ - from loopy.schedule import find_used_inames_within + from loopy.outline import find_used_inames_within from loopy.codegen.bounds import get_usable_inames_for_conditional sched_index_info_entries = [ diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 3cfb5110b..44da7dfdf 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -233,7 +233,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, LocalIndexTag, GroupIndexTag, VectorizeTag) - from loopy.schedule import get_insn_ids_for_block_at + from loopy.outline import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at( kernel.program_outline, schedule_index) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index d0ae18b59..5c185b6d5 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -281,7 +281,7 @@ def generate_host_or_device_program(codegen_state, schedule_index): from loopy.codegen.control import build_loop_nest if codegen_state.is_generating_device_code: - from loopy.schedule import CallKernel + from loopy.outline import CallKernel assert isinstance( codegen_state.kernel.program_outline[schedule_index], CallKernel) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index cd14cf88b..378bcc7ab 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -160,7 +160,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: program_outline - *None* or a list of :class:`loopy.schedule.ScheduleItem` + *None* or a list of :class:`loopy.outline.ScheduleItem` .. attribute:: name .. attribute:: preambles @@ -1345,7 +1345,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): lines.extend(sep) if show_labels: lines.append("SCHEDULE:") - from loopy.schedule import dump_schedule + from loopy.outline import dump_schedule lines.append(dump_schedule(kernel, kernel.program_outline)) lines.extend(sep) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 282a4348b..fdfaaf0e5 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -465,7 +465,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): if iname_cluster and not kernel.program_outline: try: - from loopy.schedule import get_one_scheduled_kernel + from loopy.outline import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) except RuntimeError as e: iname_cluster = False @@ -536,7 +536,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): lines.append("%s -> %s" % (insn_2, insn_1)) if iname_cluster: - from loopy.schedule import ( + from loopy.outline import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, ReturnFromKernel) @@ -1723,13 +1723,13 @@ def get_subkernels(kernel): """Return a :class:`tuple` of the names of the subkernels in the kernel. The kernel must be scheduled. - See also :class:`loopy.schedule.CallKernel`. + See also :class:`loopy.outline.CallKernel`. """ from loopy.kernel import KernelState if kernel.state != KernelState.SCHEDULED: raise LoopyError("Kernel must be scheduled") - from loopy.schedule import CallKernel + from loopy.outline import CallKernel return tuple(sched_item.kernel_name for sched_item in kernel.program_outline @@ -1746,7 +1746,7 @@ def get_subkernel_to_insn_id_map(kernel): if kernel.state != KernelState.SCHEDULED: raise LoopyError("Kernel must be scheduled") - from loopy.schedule import ( + from loopy.outline import ( sched_item_to_insn_id, CallKernel, ReturnFromKernel) subkernel = None diff --git a/loopy/loop.py b/loopy/loop.py index 459246382..b940aba25 100644 --- a/loopy/loop.py +++ b/loopy/loop.py @@ -31,7 +31,7 @@ def potential_loop_nest_map(kernel): """Returns a dictionary mapping inames to other inames that *could* be nested around them. - :seealso: :func:`loopy.schedule.loop_nest_map` + :seealso: :func:`loopy.outline.loop_nest_map` """ result = {} diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 04339b5ff..2db54e0b6 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -160,7 +160,7 @@ def get_insn_ids_for_block_at(program_outline, start_idx): def find_active_inames_at(kernel, sched_index): active_inames = [] - from loopy.schedule import EnterLoop, LeaveLoop + from loopy.outline import EnterLoop, LeaveLoop for sched_item in kernel.program_outline[:sched_index]: if isinstance(sched_item, EnterLoop): active_inames.append(sched_item.iname) @@ -1981,13 +1981,13 @@ def generate_loop_schedules_inner(kernel, debug_args={}): program_outline=gen_sched, state=KernelState.SCHEDULED) - from loopy.schedule.device_mapping import \ + from loopy.outline.device_mapping import \ map_schedule_onto_host_or_device if kernel.state != KernelState.SCHEDULED: # Device mapper only gets run once. new_kernel = map_schedule_onto_host_or_device(new_kernel) - from loopy.schedule.tools import add_extra_args_to_schedule + from loopy.outline.tools import add_extra_args_to_schedule new_kernel = add_extra_args_to_schedule(new_kernel) yield new_kernel diff --git a/loopy/outline/device_mapping.py b/loopy/outline/device_mapping.py index 235326664..9b6662a10 100644 --- a/loopy/outline/device_mapping.py +++ b/loopy/outline/device_mapping.py @@ -23,9 +23,9 @@ THE SOFTWARE. """ from loopy.diagnostic import LoopyError -from loopy.schedule import (Barrier, CallKernel, EnterLoop, LeaveLoop, +from loopy.outline import (Barrier, CallKernel, EnterLoop, LeaveLoop, ReturnFromKernel, RunInstruction) -from loopy.schedule.tools import get_block_boundaries +from loopy.outline.tools import get_block_boundaries def map_schedule_onto_host_or_device(kernel): diff --git a/loopy/outline/tools.py b/loopy/outline/tools.py index 1ac3587e2..ebef9e59e 100644 --- a/loopy/outline/tools.py +++ b/loopy/outline/tools.py @@ -30,10 +30,10 @@ from loopy.kernel.data import AddressSpace def get_block_boundaries(program_outline): """ Return a dictionary mapping indices of - :class:`loopy.schedule.BlockBeginItem`s to - :class:`loopy.schedule.BlockEndItem`s and vice versa. + :class:`loopy.outline.BlockBeginItem`s to + :class:`loopy.outline.BlockEndItem`s and vice versa. """ - from loopy.schedule import (BeginBlockItem, EndBlockItem) + from loopy.outline import (BeginBlockItem, EndBlockItem) block_bounds = {} active_blocks = [] for idx, sched_item in enumerate(program_outline): @@ -74,11 +74,11 @@ def temporaries_written_in_subkernel(kernel, subkernel): def add_extra_args_to_schedule(kernel): """ - Fill the `extra_args` fields in all the :class:`loopy.schedule.CallKernel` + Fill the `extra_args` fields in all the :class:`loopy.outline.CallKernel` instructions in the program_outline with global temporaries. """ new_program_outline = [] - from loopy.schedule import CallKernel + from loopy.outline import CallKernel for sched_item in kernel.program_outline: if isinstance(sched_item, CallKernel): diff --git a/loopy/statistics.py b/loopy/statistics.py index 601c8992c..8ba319142 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1718,7 +1718,7 @@ def get_synchronization_map(knl, subgroup_size=None): "ignore_boostable_into to be set." % knl.name) from loopy.preprocess import preprocess_kernel, infer_unknown_types - from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, + from loopy.outline import (EnterLoop, LeaveLoop, Barrier, CallKernel, ReturnFromKernel, RunInstruction) from operator import mul knl = infer_unknown_types(knl, expect_completion=True) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 4e9048c10..72905f3a6 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -509,7 +509,7 @@ class CFamilyASTBuilder(ASTBuilderBase): result = [] from loopy.kernel.data import AddressSpace - from loopy.schedule import CallKernel + from loopy.outline import CallKernel # We only need to write declarations for global variables with # the first device program. `is_first_dev_prog` determines # whether this is the first device program in the schedule. @@ -601,7 +601,7 @@ class CFamilyASTBuilder(ASTBuilderBase): from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line # Getting the temporary variables that are needed for the current # sub-kernel. - from loopy.schedule.tools import ( + from loopy.outline.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) subkernel = kernel.program_outline[schedule_index].kernel_name diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index fcf15ee78..331b64bf4 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -245,7 +245,7 @@ class CUDACASTBuilder(CFamilyASTBuilder): from cgen import Extern fdecl = Extern("C", fdecl) - from loopy.schedule import get_insn_ids_for_block_at + from loopy.outline import get_insn_ids_for_block_at _, local_grid_size = \ codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 6d31885ae..b462b8281 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -758,7 +758,7 @@ class KernelExecutorBase(object): from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) - from loopy.schedule import get_one_scheduled_kernel + from loopy.outline import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) return kernel diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 8ca0df285..554d1dea3 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -404,7 +404,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): from cgen.opencl import CLKernel, CLRequiredWorkGroupSize fdecl = CLKernel(fdecl) - from loopy.schedule import get_insn_ids_for_block_at + from loopy.outline import get_insn_ids_for_block_at _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( codegen_state.kernel.program_outline, schedule_index)) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 784fb72f6..584d257ed 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -29,11 +29,11 @@ import six from loopy.kernel.data import auto, AddressSpace from pytools import memoize_method, Record -from loopy.schedule import ( +from loopy.outline import ( EnterLoop, LeaveLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier) -from loopy.schedule.tools import get_block_boundaries +from loopy.outline.tools import get_block_boundaries import logging @@ -748,7 +748,7 @@ def save_and_reload_temporaries(knl): liveness = LivenessAnalysis(knl) saver = TemporarySaver(knl) - from loopy.schedule.tools import ( + from loopy.outline.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) for sched_idx, sched_item in enumerate(knl.program_outline): diff --git a/test/test_loopy.py b/test/test_loopy.py index 6f9af8c47..66b7a4230 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1065,7 +1065,7 @@ def test_kernel_splitting(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.schedule import get_one_scheduled_kernel + from loopy.outline import get_one_scheduled_kernel knl = get_one_scheduled_kernel(knl) # map schedule onto host or device @@ -1106,7 +1106,7 @@ def test_kernel_splitting_with_loop(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.schedule import get_one_scheduled_kernel + from loopy.outline import get_one_scheduled_kernel knl = get_one_scheduled_kernel(knl) # map schedule onto host or device @@ -1124,7 +1124,7 @@ def test_kernel_splitting_with_loop(ctx_factory): def save_and_reload_temporaries_test(queue, knl, out_expect, debug=False): from loopy.preprocess import preprocess_kernel - from loopy.schedule import get_one_scheduled_kernel + from loopy.outline import get_one_scheduled_kernel knl = preprocess_kernel(knl) knl = get_one_scheduled_kernel(knl) @@ -2208,7 +2208,7 @@ def test_nosync_option_parsing(): def barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): - from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop, + from loopy.outline import (RunInstruction, Barrier, EnterLoop, LeaveLoop, CallKernel, ReturnFromKernel) watch_for_barrier = False seen_barrier = False -- GitLab From 2a9a8147391b09e3837bb53b998dbe48ae97b417 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 23 Feb 2020 14:31:06 -0600 Subject: [PATCH 05/56] renamed get_one_scheduled_kernel->get_one_outlined_kernel --- doc/ref_transform.rst | 2 +- doc/tutorial.rst | 10 +++---- examples/python/global_barrier_removal.py | 4 +-- examples/python/ispc-stream-harness.py | 2 +- loopy/__init__.py | 4 +-- loopy/codegen/__init__.py | 4 +-- loopy/kernel/tools.py | 4 +-- loopy/outline/__init__.py | 13 +++++++-- loopy/statistics.py | 2 +- loopy/target/execution.py | 4 +-- test/test_loopy.py | 32 +++++++++++------------ test/test_target.py | 8 +++--- test/test_transform.py | 2 +- 13 files changed, 50 insertions(+), 41 deletions(-) diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index 740c5cb58..7dff68c55 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -118,7 +118,7 @@ Finishing up .. autofunction:: generate_loop_schedules -.. autofunction:: get_one_scheduled_kernel +.. autofunction:: get_one_outlined_kernel .. autofunction:: save_and_reload_temporaries diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 753b09b5d..9ff452f53 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1204,9 +1204,9 @@ Here is what happens when we try to generate code for the kernel: This happens due to the kernel splitting done by :mod:`loopy`. The splitting happens when the instruction schedule is generated. To see the schedule, we -should call :func:`loopy.get_one_scheduled_kernel`: +should call :func:`loopy.get_one_outlined_kernel`: - >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + >>> knl = lp.get_one_outlined_kernel(lp.preprocess_kernel(knl)) >>> print(knl) --------------------------------------------------------------------------- KERNEL: rotate_v2 @@ -1233,12 +1233,12 @@ goes for local temporaries). :func:`loopy.save_and_reload_temporaries` for the purpose of handling the task of saving and restoring temporary values across global barriers. This function adds instructions to the kernel without scheduling them. That means -that :func:`loopy.get_one_scheduled_kernel` needs to be called one more time to +that :func:`loopy.get_one_outlined_kernel` needs to be called one more time to put those instructions into the schedule. - >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + >>> knl = lp.get_one_outlined_kernel(lp.preprocess_kernel(knl)) >>> knl = lp.save_and_reload_temporaries(knl) - >>> knl = lp.get_one_scheduled_kernel(knl) # Schedule added instructions + >>> knl = lp.get_one_outlined_kernel(knl) # Schedule added instructions >>> print(knl) --------------------------------------------------------------------------- KERNEL: rotate_v2 diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index 9e289b270..a01c66b3c 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -23,8 +23,8 @@ knl = lp.add_and_infer_dtypes(knl, from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) -from loopy.outline import get_one_scheduled_kernel -knl = get_one_scheduled_kernel(knl) +from loopy.outline import get_one_outlined_kernel +knl = get_one_outlined_kernel(knl) # map schedule onto host or device print(knl) diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py index fa581d426..b25784726 100644 --- a/examples/python/ispc-stream-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -30,7 +30,7 @@ def transform(knl, vars, stream_dtype): def gen_code(knl): knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) codegen_result = lp.generate_code_v2(knl) return codegen_result.device_code() + "\n" + codegen_result.host_code() diff --git a/loopy/__init__.py b/loopy/__init__.py index 715e26e60..34ba239e9 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -123,7 +123,7 @@ from loopy.transform.add_barrier import add_barrier from loopy.type_inference import infer_unknown_types from loopy.preprocess import preprocess_kernel, realize_reduction -from loopy.outline import generate_loop_schedules, get_one_scheduled_kernel +from loopy.outline import generate_loop_schedules, get_one_outlined_kernel from loopy.statistics import (ToCountMap, CountGranularity, stringify_stats_mapping, Op, MemAccess, get_op_poly, get_op_map, get_lmem_access_poly, get_DRAM_access_poly, get_gmem_access_poly, get_mem_access_map, @@ -248,7 +248,7 @@ __all__ = [ "infer_unknown_types", "preprocess_kernel", "realize_reduction", - "generate_loop_schedules", "get_one_scheduled_kernel", + "generate_loop_schedules", "get_one_outlined_kernel", "GeneratedProgram", "CodeGenerationResult", "PreambleInfo", "generate_code", "generate_code_v2", "generate_body", diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index ec109e6b3..290fa8062 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -385,8 +385,8 @@ def generate_code_v2(kernel): kernel = preprocess_kernel(kernel) if kernel.program_outline is None: - from loopy.outline import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) + from loopy.outline import get_one_outlined_kernel + kernel = get_one_outlined_kernel(kernel) if kernel.state != KernelState.SCHEDULED: raise LoopyError("cannot generate code for a kernel that has not been " diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index fdfaaf0e5..4fcec5b43 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -465,8 +465,8 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): if iname_cluster and not kernel.program_outline: try: - from loopy.outline import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) + from loopy.outline import get_one_outlined_kernel + kernel = get_one_outlined_kernel(kernel) except RuntimeError as e: iname_cluster = False from warnings import warn diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 2db54e0b6..9095e45c6 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -2021,7 +2021,7 @@ schedule_cache = WriteOncePersistentDict( key_builder=LoopyKeyBuilder()) -def _get_one_scheduled_kernel_inner(kernel): +def _get_one_outlined_kernel_inner(kernel): # This helper function exists to ensure that the generator chain is fully # out of scope after the function returns. This allows it to be # garbage-collected in the exit handler of the @@ -2035,6 +2035,15 @@ def _get_one_scheduled_kernel_inner(kernel): def get_one_scheduled_kernel(kernel): + warn_with_kernel( + kernel, "get_one_scheduled_kernel_deprecated", + "get_one_scheduled_kernel is deprecated. " + "Use get_one_outlined_kernel instead.", + DeprecationWarning) + return get_one_outlined_kernel(kernel) + + +def get_one_outlined_kernel(kernel): from loopy import CACHING_ENABLED sched_cache_key = kernel @@ -2052,7 +2061,7 @@ def get_one_scheduled_kernel(kernel): if not from_cache: with ProcessLogger(logger, "%s: program_outline" % kernel.name): with MinRecursionLimitForScheduling(kernel): - result = _get_one_scheduled_kernel_inner(kernel) + result = _get_one_outlined_kernel_inner(kernel) if CACHING_ENABLED and not from_cache: schedule_cache.store_if_not_present(sched_cache_key, result) diff --git a/loopy/statistics.py b/loopy/statistics.py index 8ba319142..a78b25b33 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1723,7 +1723,7 @@ def get_synchronization_map(knl, subgroup_size=None): from operator import mul knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) iname_list = [] result = ToCountMap() diff --git a/loopy/target/execution.py b/loopy/target/execution.py index b462b8281..ef145fbe8 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -758,8 +758,8 @@ class KernelExecutorBase(object): from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) - from loopy.outline import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) + from loopy.outline import get_one_outlined_kernel + kernel = get_one_outlined_kernel(kernel) return kernel diff --git a/test/test_loopy.py b/test/test_loopy.py index 66b7a4230..56e4e5ee3 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1065,8 +1065,8 @@ def test_kernel_splitting(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.outline import get_one_scheduled_kernel - knl = get_one_scheduled_kernel(knl) + from loopy.outline import get_one_outlined_kernel + knl = get_one_outlined_kernel(knl) # map schedule onto host or device print(knl) @@ -1106,8 +1106,8 @@ def test_kernel_splitting_with_loop(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.outline import get_one_scheduled_kernel - knl = get_one_scheduled_kernel(knl) + from loopy.outline import get_one_outlined_kernel + knl = get_one_outlined_kernel(knl) # map schedule onto host or device print(knl) @@ -1124,14 +1124,14 @@ def test_kernel_splitting_with_loop(ctx_factory): def save_and_reload_temporaries_test(queue, knl, out_expect, debug=False): from loopy.preprocess import preprocess_kernel - from loopy.outline import get_one_scheduled_kernel + from loopy.outline import get_one_outlined_kernel knl = preprocess_kernel(knl) - knl = get_one_scheduled_kernel(knl) + knl = get_one_outlined_kernel(knl) from loopy.transform.save import save_and_reload_temporaries knl = save_and_reload_temporaries(knl) - knl = get_one_scheduled_kernel(knl) + knl = get_one_outlined_kernel(knl) if debug: print(knl) @@ -1395,7 +1395,7 @@ def test_save_ambiguous_storage_requirements(): knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): @@ -1752,7 +1752,7 @@ def test_missing_global_barrier(): from loopy.diagnostic import MissingBarrierError with pytest.raises(MissingBarrierError): - lp.get_one_scheduled_kernel(knl) + lp.get_one_outlined_kernel(knl) def test_index_cse(ctx_factory): @@ -1884,7 +1884,7 @@ def test_const_temp_with_initializer_not_saved(): seq_dependencies=True) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) knl = lp.save_and_reload_temporaries(knl) # This ensures no save slot was added. @@ -2089,7 +2089,7 @@ def test_unscheduled_insn_detection(): """, "...") - knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_outlined_kernel(lp.preprocess_kernel(knl)) insn1, = lp.find_instructions(knl, "id:insn1") knl.instructions.append(insn1.copy(id="insn2")) @@ -2254,7 +2254,7 @@ def test_barrier_insertion_near_top_of_loop(): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.set_temporary_scope(knl, "b", "local") - knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_outlined_kernel(lp.preprocess_kernel(knl)) print(knl) @@ -2281,7 +2281,7 @@ def test_barrier_insertion_near_bottom_of_loop(): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.set_temporary_scope(knl, "b", "local") - knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_outlined_kernel(lp.preprocess_kernel(knl)) print(knl) @@ -2650,7 +2650,7 @@ def test_check_for_variable_access_ordering(): from loopy.diagnostic import VariableAccessNotOrdered with pytest.raises(VariableAccessNotOrdered): - lp.get_one_scheduled_kernel(knl) + lp.get_one_outlined_kernel(knl) def test_check_for_variable_access_ordering_with_aliasing(): @@ -2669,7 +2669,7 @@ def test_check_for_variable_access_ordering_with_aliasing(): from loopy.diagnostic import VariableAccessNotOrdered with pytest.raises(VariableAccessNotOrdered): - lp.get_one_scheduled_kernel(knl) + lp.get_one_outlined_kernel(knl) @pytest.mark.parametrize(("second_index", "expect_barrier"), @@ -2692,7 +2692,7 @@ def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): knl = lp.tag_inames(knl, "i:l.0") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) assert barrier_between(knl, "first", "second") == expect_barrier diff --git a/test/test_target.py b/test/test_target.py index bcf85a340..03e1cb502 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -73,7 +73,7 @@ def test_ispc_target(occa_mode=False): default_tag="l.auto") codegen_result = lp.generate_code_v2( - lp.get_one_scheduled_kernel( + lp.get_one_outlined_kernel( lp.preprocess_kernel(knl))) print(codegen_result.device_code()) @@ -99,7 +99,7 @@ def test_cuda_target(): print( lp.generate_code( - lp.get_one_scheduled_kernel( + lp.get_one_outlined_kernel( lp.preprocess_kernel(knl)))[0]) @@ -142,7 +142,7 @@ def test_generate_c_snippet(): knl = lp.prioritize_loops(knl, "I,k_outer,k_inner") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) print(lp.generate_body(knl)) @@ -354,7 +354,7 @@ def test_ispc_streaming_stores(): knl = lp.set_argument_order(knl, vars + ["n"]) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) lp.generate_code_v2(knl).all_code() diff --git a/test/test_transform.py b/test/test_transform.py index 6eb6697b5..76b8d0d25 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -266,7 +266,7 @@ def test_vectorize(ctx_factory): knl = lp.tag_inames(knl, {"i_inner": "vec"}) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_outlined_kernel(knl) code, inf = lp.generate_code(knl) lp.auto_test_vs_ref( -- GitLab From 6ae021009c7837b5ed4fcd0953183b4a3a51ade4 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 23 Feb 2020 14:39:24 -0600 Subject: [PATCH 06/56] renamed generate_loop_schedules->generate_loop_outlines --- doc/ref_transform.rst | 2 +- loopy/__init__.py | 4 ++-- loopy/auto_test.py | 4 ++-- loopy/outline/__init__.py | 30 +++++++++++++++--------------- proto-tests/test_fem_assembly.py | 2 +- proto-tests/test_sem.py | 12 ++++++------ proto-tests/test_sem_tim.py | 12 ++++++------ proto-tests/test_tim.py | 6 +++--- test/test_domain.py | 8 ++++---- test/test_loopy.py | 20 ++++++++++---------- 10 files changed, 50 insertions(+), 50 deletions(-) diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index 7dff68c55..7df4fb3bd 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -116,7 +116,7 @@ Finishing up .. autofunction:: preprocess_kernel -.. autofunction:: generate_loop_schedules +.. autofunction:: generate_loop_outlines .. autofunction:: get_one_outlined_kernel diff --git a/loopy/__init__.py b/loopy/__init__.py index 34ba239e9..33cf5e3e0 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -123,7 +123,7 @@ from loopy.transform.add_barrier import add_barrier from loopy.type_inference import infer_unknown_types from loopy.preprocess import preprocess_kernel, realize_reduction -from loopy.outline import generate_loop_schedules, get_one_outlined_kernel +from loopy.outline import generate_loop_outlines, get_one_outlined_kernel from loopy.statistics import (ToCountMap, CountGranularity, stringify_stats_mapping, Op, MemAccess, get_op_poly, get_op_map, get_lmem_access_poly, get_DRAM_access_poly, get_gmem_access_poly, get_mem_access_map, @@ -248,7 +248,7 @@ __all__ = [ "infer_unknown_types", "preprocess_kernel", "realize_reduction", - "generate_loop_schedules", "get_one_outlined_kernel", + "generate_loop_outlines", "get_one_outlined_kernel", "GeneratedProgram", "CodeGenerationResult", "PreambleInfo", "generate_code", "generate_code_v2", "generate_body", diff --git a/loopy/auto_test.py b/loopy/auto_test.py index a2cd1ff1b..0cedc980a 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -450,7 +450,7 @@ def auto_test_vs_ref( pp_ref_knl = lp.preprocess_kernel(ref_knl) - for knl in lp.generate_loop_schedules(pp_ref_knl): + for knl in lp.generate_loop_outlines(pp_ref_knl): ref_sched_kernel = knl break @@ -541,7 +541,7 @@ def auto_test_vs_ref( test_knl = lp.preprocess_kernel(test_knl) if not test_knl.program_outline: - test_kernels = lp.generate_loop_schedules(test_knl) + test_kernels = lp.generate_loop_outlines(test_knl) else: test_kernels = [test_knl] diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 9095e45c6..3fbbc7647 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -651,7 +651,7 @@ class SchedulerState(ImmutableRecord): return None -def generate_loop_schedules_internal( +def generate_loop_outlines_internal( sched_state, allow_boost=False, debug=None): # allow_insn is set to False initially and after entering each loop # to give loops containing high-priority instructions a chance. @@ -712,7 +712,7 @@ def generate_loop_schedules_internal( if isinstance(next_preoutline_item, CallKernel): assert sched_state.within_subkernel is False - for result in generate_loop_schedules_internal( + for result in generate_loop_outlines_internal( sched_state.copy( program_outline=sched_state.program_outline + ( next_preoutline_item,), @@ -728,7 +728,7 @@ def generate_loop_schedules_internal( assert sched_state.within_subkernel is True # Make sure all subkernel inames have finished. if sched_state.active_inames == sched_state.enclosing_subkernel_inames: - for result in generate_loop_schedules_internal( + for result in generate_loop_outlines_internal( sched_state.copy( program_outline=sched_state.program_outline + ( next_preoutline_item,), @@ -750,7 +750,7 @@ def generate_loop_schedules_internal( if ( isinstance(next_preoutline_item, Barrier) and next_preoutline_item.originating_insn_id is None): - for result in generate_loop_schedules_internal( + for result in generate_loop_outlines_internal( sched_state.copy( program_outline=sched_state.program_outline + ( next_preoutline_item,), @@ -949,7 +949,7 @@ def generate_loop_schedules_internal( # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of scheduler and see if more progress can be # made. - for sub_sched in generate_loop_schedules_internal( + for sub_sched in generate_loop_outlines_internal( new_sched_state, allow_boost=rec_allow_boost, debug=debug): yield sub_sched @@ -1046,7 +1046,7 @@ def generate_loop_schedules_internal( if can_leave and not debug_mode: - for sub_sched in generate_loop_schedules_internal( + for sub_sched in generate_loop_outlines_internal( sched_state.copy( program_outline=( sched_state.program_outline @@ -1258,7 +1258,7 @@ def generate_loop_schedules_internal( iname), reverse=True): - for sub_sched in generate_loop_schedules_internal( + for sub_sched in generate_loop_outlines_internal( sched_state.copy( program_outline=( sched_state.program_outline @@ -1312,7 +1312,7 @@ def generate_loop_schedules_internal( else: if not allow_boost and allow_boost is not None: # try again with boosting allowed - for sub_sched in generate_loop_schedules_internal( + for sub_sched in generate_loop_outlines_internal( sched_state, allow_boost=True, debug=debug): yield sub_sched @@ -1827,7 +1827,7 @@ class MinRecursionLimitForScheduling(MinRecursionLimit): # {{{ main scheduling entrypoint -def generate_loop_schedules(kernel, debug_args={}): +def generate_loop_outlines(kernel, debug_args={}): """ .. warning:: @@ -1840,11 +1840,11 @@ def generate_loop_schedules(kernel, debug_args={}): """ with MinRecursionLimitForScheduling(kernel): - for sched in generate_loop_schedules_inner(kernel, debug_args=debug_args): + for sched in generate_loop_outlines_inner(kernel, debug_args=debug_args): yield sched -def generate_loop_schedules_inner(kernel, debug_args={}): +def generate_loop_outlines_inner(kernel, debug_args={}): from loopy.kernel import KernelState if kernel.state not in (KernelState.PREPROCESSED, KernelState.SCHEDULED): raise LoopyError("cannot schedule a kernel that has not been " @@ -1937,7 +1937,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): print() print("To disable this interactive behavior, pass") print(" debug_args=dict(interactive=False)") - print("to generate_loop_schedules().") + print("to generate_loop_outlines().") print(75*"-") six.moves.input("Enter:") print() @@ -1946,7 +1946,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): debug.debug_length = len(debug.longest_rejected_schedule) while True: try: - for _ in generate_loop_schedules_internal( + for _ in generate_loop_outlines_internal( sched_state, debug=debug, **schedule_gen_kwargs): pass @@ -1957,7 +1957,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): break try: - for gen_sched in generate_loop_schedules_internal( + for gen_sched in generate_loop_outlines_internal( sched_state, debug=debug, **schedule_gen_kwargs): debug.stop() @@ -2031,7 +2031,7 @@ def _get_one_outlined_kernel_inner(kernel): # # See https://gitlab.tiker.net/inducer/sumpy/issues/31 for context. - return next(iter(generate_loop_schedules(kernel))) + return next(iter(generate_loop_outlines(kernel))) def get_one_scheduled_kernel(kernel): diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index 18f2a5bfa..55abe2135 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -123,7 +123,7 @@ def test_laplacian_stiffness(ctx_factory): # v for variant in [variant_fig33]: var_knl, loop_prio = variant(knl) - kernel_gen = lp.generate_loop_schedules(var_knl, + kernel_gen = lp.generate_loop_outlines(var_knl, loop_priority=loop_prio) kernel_gen = lp.check_kernels(kernel_gen, dict(Nc=Nc)) diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index 4613b74ae..4774a8685 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -99,7 +99,7 @@ def test_laplacian(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl, + kernel_gen = lp.generate_loop_outlines(knl, loop_priority=["m_fetch_G", "i_fetch_u"]) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) @@ -179,7 +179,7 @@ def test_laplacian_lmem(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) K = 1000 @@ -256,7 +256,7 @@ def test_laplacian_lmem_ilp(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) for knl in kernel_gen: @@ -347,7 +347,7 @@ def test_advect(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) @@ -467,7 +467,7 @@ def test_advect_dealias(ctx_factory): print(knl) #1/0 - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) @@ -531,7 +531,7 @@ def test_interp_diff(ctx_factory): print(knl) #1/0 - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 1bfb437fb..0fbba945b 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -101,7 +101,7 @@ def test_laplacian(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl, + kernel_gen = lp.generate_loop_outlines(knl, loop_priority=["m_fetch_G", "i_fetch_u"]) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) @@ -191,7 +191,7 @@ def test_laplacian_lmem(ctx_factory): # ValueError: cannot tag 'i_and_j'--not known # knl = lp.tag_inames(knl, dict(i_and_j="l.0", k="l.1")) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) K = 1000 @@ -262,7 +262,7 @@ def test_laplacian_lmem_ilp(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) for knl in kernel_gen: @@ -353,7 +353,7 @@ def test_advect(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) @@ -473,7 +473,7 @@ def test_advect_dealias(ctx_factory): print(knl) #1/0 - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) @@ -537,7 +537,7 @@ def test_interp_diff(ctx_factory): print(knl) #1/0 - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index d7061933e..7c681fa77 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -63,7 +63,7 @@ def test_tim2d(ctx_factory): # knl = lp.add_prefetch(knl, "G", [2,3], default_tag=None) # axis/argument indices on G knl = lp.add_prefetch(knl, "G", [2,3], default_tag="l.auto") # axis/argument indices on G - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) K = 1000 @@ -129,7 +129,7 @@ def test_red2d(ctx_factory): knl = lp.add_prefetch(knl, "G", [2,3], default_tag="l.auto") # axis/argument indices on G - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) K = 1000 @@ -201,7 +201,7 @@ def test_tim3d(ctx_factory): knl = lp.add_prefetch(knl, "G", [2,3,4], default_tag="l.auto") # axis/argument indices on G - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) K = 4000 diff --git a/test/test_domain.py b/test/test_domain.py index ebfde8509..9f07a64e3 100644 --- a/test/test_domain.py +++ b/test/test_domain.py @@ -68,7 +68,7 @@ def test_assume(ctx_factory): knl = lp.assume(knl, "n mod 16 = 0") knl = lp.assume(knl, "n > 10") knl = lp.preprocess_kernel(knl, ctx.devices[0]) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) for gen_knl in kernel_gen: print(gen_knl) @@ -97,7 +97,7 @@ def test_divisibility_assumption(ctx_factory): knl = lp.split_iname(knl, "i", 16) knl = lp.preprocess_kernel(knl, ctx.devices[0]) - for k in lp.generate_loop_schedules(knl): + for k in lp.generate_loop_outlines(knl): code = lp.generate_code(k) assert "if" not in code @@ -124,7 +124,7 @@ def test_eq_constraint(ctx_factory): knl = lp.split_iname(knl, "i_inner", 16, outer_tag=None, inner_tag="l.0") knl = lp.preprocess_kernel(knl, ctx.devices[0]) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) for knl in kernel_gen: print(lp.generate_code(knl)) @@ -229,7 +229,7 @@ def test_dependent_loop_bounds_3(ctx_factory): knl = lp.preprocess_kernel(knl, ctx.devices[0]) with pytest.raises(RuntimeError): - list(lp.generate_loop_schedules(knl_bad)) + list(lp.generate_loop_outlines(knl_bad)) def test_dependent_loop_bounds_4(): diff --git a/test/test_loopy.py b/test/test_loopy.py index 56e4e5ee3..08ae85222 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -123,7 +123,7 @@ def test_type_inference_no_artificial_doubles(): assumptions="n>=1") knl = lp.preprocess_kernel(knl) - for k in lp.generate_loop_schedules(knl): + for k in lp.generate_loop_outlines(knl): code = lp.generate_code(k) assert "double" not in code @@ -185,7 +185,7 @@ def test_simple_side_effect(ctx_factory): ) knl = lp.preprocess_kernel(knl) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) for gen_knl in kernel_gen: print(gen_knl) @@ -207,7 +207,7 @@ def test_owed_barriers(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.preprocess_kernel(knl) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) for gen_knl in kernel_gen: compiled = lp.CompiledKernel(ctx, gen_knl) @@ -228,7 +228,7 @@ def test_wg_too_small(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.preprocess_kernel(knl) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) import pytest for gen_knl in kernel_gen: @@ -251,7 +251,7 @@ def test_multi_cse(ctx_factory): knl = lp.add_prefetch(knl, "a", []) knl = lp.preprocess_kernel(knl) - kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.generate_loop_outlines(knl) for gen_knl in kernel_gen: compiled = lp.CompiledKernel(ctx, gen_knl) @@ -305,7 +305,7 @@ def test_ilp_write_race_detection_global(): from loopy.diagnostic import WriteRaceConditionWarning from warnings import catch_warnings with catch_warnings(record=True) as warn_list: - list(lp.generate_loop_schedules(knl)) + list(lp.generate_loop_outlines(knl)) assert any(isinstance(w.message, WriteRaceConditionWarning) for w in warn_list) @@ -322,7 +322,7 @@ def test_ilp_write_race_avoidance_local(): knl = lp.tag_inames(knl, dict(i="l.0", j="ilp")) knl = lp.preprocess_kernel(knl) - for k in lp.generate_loop_schedules(knl): + for k in lp.generate_loop_outlines(knl): assert k.temporary_variables["a"].shape == (16, 17) @@ -337,7 +337,7 @@ def test_ilp_write_race_avoidance_private(): knl = lp.tag_inames(knl, dict(j="ilp")) knl = lp.preprocess_kernel(knl) - for k in lp.generate_loop_schedules(knl): + for k in lp.generate_loop_outlines(knl): assert k.temporary_variables["a"].shape == (16,) # }}} @@ -777,7 +777,7 @@ def test_multiple_writes_to_local_temporary(): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.preprocess_kernel(knl) - for k in lp.generate_loop_schedules(knl): + for k in lp.generate_loop_outlines(knl): code, _ = lp.generate_code(k) print(code) @@ -860,7 +860,7 @@ def test_variable_size_temporary(): # Make sure that code generation succeeds even if # there are variable-length arrays. knl = lp.preprocess_kernel(knl) - for k in lp.generate_loop_schedules(knl): + for k in lp.generate_loop_outlines(knl): lp.generate_code(k) -- GitLab From 3459805062eeb415e3d172cac6195034841c8d06 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 05:57:11 -0600 Subject: [PATCH 07/56] rename program_outline->outline --- loopy/auto_test.py | 2 +- loopy/check.py | 16 ++-- loopy/codegen/__init__.py | 4 +- loopy/codegen/bounds.py | 4 +- loopy/codegen/control.py | 14 +-- loopy/codegen/loop.py | 8 +- loopy/codegen/result.py | 2 +- loopy/kernel/__init__.py | 14 +-- loopy/kernel/tools.py | 8 +- loopy/outline/__init__.py | 162 ++++++++++++++++---------------- loopy/outline/device_mapping.py | 66 ++++++------- loopy/outline/tools.py | 16 ++-- loopy/statistics.py | 2 +- loopy/target/c/__init__.py | 4 +- loopy/target/cuda.py | 2 +- loopy/target/execution.py | 2 +- loopy/target/opencl.py | 2 +- loopy/transform/save.py | 36 +++---- test/test_loopy.py | 2 +- 19 files changed, 183 insertions(+), 183 deletions(-) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 0cedc980a..248cd5bb0 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -540,7 +540,7 @@ def auto_test_vs_ref( test_knl = lp.preprocess_kernel(test_knl) - if not test_knl.program_outline: + if not test_knl.outline: test_kernels = lp.generate_loop_outlines(test_knl) else: test_kernels = [test_knl] diff --git a/loopy/check.py b/loopy/check.py index bb0c63f5b..dd3620647 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -691,18 +691,18 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): local_axes = set() i = 0 - loop_end_i = past_end_i = len(kernel.program_outline) + loop_end_i = past_end_i = len(kernel.outline) else: - assert isinstance(kernel.program_outline[sched_index], CallKernel) - _, past_end_i = gather_schedule_block(kernel.program_outline, sched_index) + assert isinstance(kernel.outline[sched_index], CallKernel) + _, past_end_i = gather_schedule_block(kernel.outline, sched_index) group_size, local_size = kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at(kernel.program_outline, sched_index)) + get_insn_ids_for_block_at(kernel.outline, sched_index)) group_axes = set(ax for ax, length in enumerate(group_size)) local_axes = set(ax for ax, length in enumerate(local_size)) i = sched_index + 1 - assert isinstance(kernel.program_outline[past_end_i - 1], ReturnFromKernel) + assert isinstance(kernel.outline[past_end_i - 1], ReturnFromKernel) loop_end_i = past_end_i - 1 # alternative: just disregard length-1 dimensions? @@ -711,7 +711,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): GroupIndexTag) while i < loop_end_i: - sched_item = kernel.program_outline[i] + sched_item = kernel.outline[i] if isinstance(sched_item, CallKernel): i = _check_for_unused_hw_axes_in_kernel_chunk(kernel, i) @@ -765,7 +765,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): def check_for_unused_hw_axes_in_insns(kernel): - if kernel.program_outline: + if kernel.outline: _check_for_unused_hw_axes_in_kernel_chunk(kernel) # }}} @@ -858,7 +858,7 @@ def check_that_all_insns_are_scheduled(kernel): from loopy.outline import sched_item_to_insn_id scheduled_insns = set( insn_id - for sched_item in kernel.program_outline + for sched_item in kernel.outline for insn_id in sched_item_to_insn_id(sched_item)) assert scheduled_insns <= all_schedulable_insns diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 290fa8062..360b77e43 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -384,7 +384,7 @@ def generate_code_v2(kernel): from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) - if kernel.program_outline is None: + if kernel.outline is None: from loopy.outline import get_one_outlined_kernel kernel = get_one_outlined_kernel(kernel) @@ -470,7 +470,7 @@ def generate_code_v2(kernel): kernel.target.host_program_name_prefix + kernel.name + kernel.target.host_program_name_suffix), - schedule_index_end=len(kernel.program_outline)) + schedule_index_end=len(kernel.outline)) from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 30d98fecf..d364948cd 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -69,7 +69,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): within_subkernel = False for sched_item_index, sched_item in enumerate( - kernel.program_outline[:sched_index]): + kernel.outline[:sched_index]): from loopy.outline import CallKernel, ReturnFromKernel if isinstance(sched_item, CallKernel): within_subkernel = True @@ -82,7 +82,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): return frozenset(result) insn_ids_for_subkernel = get_insn_ids_for_block_at( - kernel.program_outline, subkernel_index) + kernel.outline, subkernel_index) inames_for_subkernel = ( iname diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index bd8b0d9c2..daa02c81c 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -36,7 +36,7 @@ def synthesize_idis_for_extra_args(kernel, schedule_index): """ :returns: A list of :class:`loopy.codegen.ImplementedDataInfo` """ - sched_item = kernel.program_outline[schedule_index] + sched_item = kernel.outline[schedule_index] from loopy.codegen import ImplementedDataInfo from loopy.kernel.data import InameArg, AddressSpace @@ -67,13 +67,13 @@ def synthesize_idis_for_extra_args(kernel, schedule_index): def generate_code_for_sched_index(codegen_state, sched_index): kernel = codegen_state.kernel - sched_item = kernel.program_outline[sched_index] + sched_item = kernel.outline[sched_index] if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.outline import (gather_schedule_block, get_insn_ids_for_block_at) - _, past_end_i = gather_schedule_block(kernel.program_outline, sched_index) + _, past_end_i = gather_schedule_block(kernel.outline, sched_index) assert past_end_i <= codegen_state.schedule_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) @@ -90,7 +90,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): new_codegen_state, sched_index) glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at(kernel.program_outline, sched_index)) + get_insn_ids_for_block_at(kernel.outline, sched_index)) return merge_codegen_results(codegen_state, [ codegen_result, @@ -178,7 +178,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): def get_required_predicates(kernel, sched_index): result = None for _, sched_item in generate_sub_sched_items( - kernel.program_outline, sched_index): + kernel.outline, sched_index): if isinstance(sched_item, Barrier): my_preds = frozenset() elif isinstance(sched_item, RunInstruction): @@ -240,7 +240,7 @@ def build_loop_nest(codegen_state, schedule_index): i = schedule_index while i < codegen_state.schedule_index_end: - sched_item = kernel.program_outline[i] + sched_item = kernel.outline[i] if isinstance(sched_item, LeaveLoop): break @@ -248,7 +248,7 @@ def build_loop_nest(codegen_state, schedule_index): my_sched_indices.append(i) if isinstance(sched_item, (EnterLoop, CallKernel)): - _, i = gather_schedule_block(kernel.program_outline, i) + _, i = gather_schedule_block(kernel.outline, i) assert i <= codegen_state.schedule_index_end, \ "schedule block extends beyond schedule_index_end" diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 44da7dfdf..8f2a51ea5 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -119,7 +119,7 @@ def get_slab_decomposition(kernel, iname): def generate_unroll_loop(codegen_state, sched_index): kernel = codegen_state.kernel - iname = kernel.program_outline[sched_index].iname + iname = kernel.outline[sched_index].iname bounds = kernel.get_iname_bounds(iname, constants_only=True) @@ -161,7 +161,7 @@ def generate_unroll_loop(codegen_state, sched_index): def generate_vectorize_loop(codegen_state, sched_index): kernel = codegen_state.kernel - iname = kernel.program_outline[sched_index].iname + iname = kernel.outline[sched_index].iname bounds = kernel.get_iname_bounds(iname, constants_only=True) @@ -235,7 +235,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, from loopy.outline import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at( - kernel.program_outline, schedule_index) + kernel.outline, schedule_index) if hw_inames_left is None: all_inames_by_insns = set() @@ -347,7 +347,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper - loop_iname = kernel.program_outline[sched_index].iname + loop_iname = kernel.outline[sched_index].iname slabs = get_slab_decomposition(kernel, loop_iname) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 5c185b6d5..1c5957327 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -283,7 +283,7 @@ def generate_host_or_device_program(codegen_state, schedule_index): if codegen_state.is_generating_device_code: from loopy.outline import CallKernel assert isinstance( - codegen_state.kernel.program_outline[schedule_index], CallKernel) + codegen_state.kernel.outline[schedule_index], CallKernel) from loopy.codegen.loop import set_up_hw_parallel_loops codegen_result = set_up_hw_parallel_loops( diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 378bcc7ab..2c864d6f8 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -158,7 +158,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): A list of :class:`loopy.KernelArgument` - .. attribute:: program_outline + .. attribute:: outline *None* or a list of :class:`loopy.outline.ScheduleItem` @@ -227,7 +227,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ constructor - def __init__(self, domains, instructions, args=None, program_outline=None, + def __init__(self, domains, instructions, args=None, outline=None, name="loopy_kernel", preambles=None, preamble_generators=None, @@ -351,7 +351,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): domains=domains, instructions=instructions, args=args, - program_outline=program_outline, + outline=outline, name=name, preambles=preambles, preamble_generators=preamble_generators, @@ -1239,7 +1239,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): "rules", "instructions", "Dependencies", - "program_outline", + "outline", ]) first_letter_to_what = dict( @@ -1341,12 +1341,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): "(use loopy.show_dependency_graph to visualize)") lines.extend(dep_lines) - if "program_outline" in what and kernel.program_outline is not None: + if "outline" in what and kernel.outline is not None: lines.extend(sep) if show_labels: lines.append("SCHEDULE:") from loopy.outline import dump_schedule - lines.append(dump_schedule(kernel, kernel.program_outline)) + lines.append(dump_schedule(kernel, kernel.outline)) lines.extend(sep) @@ -1473,7 +1473,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): "domains", "instructions", "args", - "program_outline", + "outline", "name", "preambles", "assumptions", diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 4fcec5b43..77fe38c0c 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -463,7 +463,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): from loopy.kernel.creation import apply_single_writer_depencency_heuristic kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=False) - if iname_cluster and not kernel.program_outline: + if iname_cluster and not kernel.outline: try: from loopy.outline import get_one_outlined_kernel kernel = get_one_outlined_kernel(kernel) @@ -540,7 +540,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, ReturnFromKernel) - for sched_item in kernel.program_outline: + for sched_item in kernel.outline: if isinstance(sched_item, EnterLoop): lines.append("subgraph cluster_%s { label=\"%s\"" % (sched_item.iname, sched_item.iname)) @@ -1732,7 +1732,7 @@ def get_subkernels(kernel): from loopy.outline import CallKernel return tuple(sched_item.kernel_name - for sched_item in kernel.program_outline + for sched_item in kernel.outline if isinstance(sched_item, CallKernel)) @@ -1752,7 +1752,7 @@ def get_subkernel_to_insn_id_map(kernel): subkernel = None result = {} - for sched_item in kernel.program_outline: + for sched_item in kernel.outline: if isinstance(sched_item, CallKernel): subkernel = sched_item.kernel_name result[subkernel] = set() diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 3fbbc7647..377971c24 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -105,33 +105,33 @@ class Barrier(ScheduleItem): # {{{ schedule utilities -def gather_schedule_block(program_outline, start_idx): - assert isinstance(program_outline[start_idx], BeginBlockItem) +def gather_schedule_block(outline, start_idx): + assert isinstance(outline[start_idx], BeginBlockItem) level = 0 i = start_idx - while i < len(program_outline): - if isinstance(program_outline[i], BeginBlockItem): + while i < len(outline): + if isinstance(outline[i], BeginBlockItem): level += 1 - elif isinstance(program_outline[i], EndBlockItem): + elif isinstance(outline[i], EndBlockItem): level -= 1 if level == 0: - return program_outline[start_idx:i+1], i+1 + return outline[start_idx:i+1], i+1 i += 1 assert False -def generate_sub_sched_items(program_outline, start_idx): - if not isinstance(program_outline[start_idx], BeginBlockItem): - yield start_idx, program_outline[start_idx] +def generate_sub_sched_items(outline, start_idx): + if not isinstance(outline[start_idx], BeginBlockItem): + yield start_idx, outline[start_idx] level = 0 i = start_idx - while i < len(program_outline): - sched_item = program_outline[i] + while i < len(outline): + sched_item = outline[i] if isinstance(sched_item, BeginBlockItem): level += 1 @@ -149,11 +149,11 @@ def generate_sub_sched_items(program_outline, start_idx): assert False -def get_insn_ids_for_block_at(program_outline, start_idx): +def get_insn_ids_for_block_at(outline, start_idx): return frozenset( sub_sched_item.insn_id for i, sub_sched_item in generate_sub_sched_items( - program_outline, start_idx) + outline, start_idx) if isinstance(sub_sched_item, RunInstruction)) @@ -161,7 +161,7 @@ def find_active_inames_at(kernel, sched_index): active_inames = [] from loopy.outline import EnterLoop, LeaveLoop - for sched_item in kernel.program_outline[:sched_index]: + for sched_item in kernel.outline[:sched_index]: if isinstance(sched_item, EnterLoop): active_inames.append(sched_item.iname) if isinstance(sched_item, LeaveLoop): @@ -171,11 +171,11 @@ def find_active_inames_at(kernel, sched_index): def has_barrier_within(kernel, sched_index): - sched_item = kernel.program_outline[sched_index] + sched_item = kernel.outline[sched_index] if isinstance(sched_item, BeginBlockItem): loop_contents, _ = gather_schedule_block( - kernel.program_outline, sched_index) + kernel.outline, sched_index) from pytools import any return any(isinstance(subsched_item, Barrier) for subsched_item in loop_contents) @@ -186,11 +186,11 @@ def has_barrier_within(kernel, sched_index): def find_used_inames_within(kernel, sched_index): - sched_item = kernel.program_outline[sched_index] + sched_item = kernel.outline[sched_index] if isinstance(sched_item, BeginBlockItem): loop_contents, _ = gather_schedule_block( - kernel.program_outline, sched_index) + kernel.outline, sched_index) run_insns = [subsched_item for subsched_item in loop_contents if isinstance(subsched_item, RunInstruction)] @@ -456,12 +456,12 @@ def format_insn(kernel, insn_id): Fore.CYAN, str(insn), Style.RESET_ALL) -def dump_schedule(kernel, program_outline): +def dump_schedule(kernel, outline): lines = [] indent = "" from loopy.kernel.data import MultiAssignmentBase - for sched_item in program_outline: + for sched_item in outline: if isinstance(sched_item, EnterLoop): lines.append(indent + "for %s" % sched_item.iname) indent += " " @@ -523,13 +523,13 @@ class ScheduleDebugger: sys.stdout.flush() self.wrote_status = 2 - def log_success(self, program_outline): + def log_success(self, outline): self.success_counter += 1 self.update() - def log_dead_end(self, program_outline): - if len(program_outline) > len(self.longest_rejected_schedule): - self.longest_rejected_schedule = program_outline + def log_dead_end(self, outline): + if len(outline) > len(self.longest_rejected_schedule): + self.longest_rejected_schedule = outline self.dead_end_counter += 1 self.update() @@ -598,7 +598,7 @@ class SchedulerState(ImmutableRecord): The inames of the last entered subkernel - .. attribute:: program_outline + .. attribute:: outline .. attribute:: scheduled_insn_ids @@ -607,7 +607,7 @@ class SchedulerState(ImmutableRecord): .. attribute:: preoutline A sequence of schedule items that must be inserted into the - program_outline, maintaining the same relative ordering. Newly scheduled + outline, maintaining the same relative ordering. Newly scheduled items may interleave this sequence. .. attribute:: preoutlined_insn_ids @@ -677,7 +677,7 @@ def generate_loop_outlines_internal( if debug is not None: if (debug.debug_length is not None - and len(sched_state.program_outline) >= debug.debug_length): + and len(sched_state.outline) >= debug.debug_length): debug_mode = True if debug_mode: @@ -688,7 +688,7 @@ def generate_loop_outlines_internal( print(kernel.stringify(with_dependencies=True)) print(75*"=") print("CURRENT SCHEDULE:") - print(dump_schedule(sched_state.kernel, sched_state.program_outline)) + print(dump_schedule(sched_state.kernel, sched_state.outline)) if sched_state.preoutline: print(75*"=") print("PREOUTLINED ITEMS AWAITING SCHEDULING:") @@ -703,7 +703,7 @@ def generate_loop_outlines_internal( if debug.debug_length == len(debug.longest_rejected_schedule): print("WHY IS THIS A DEAD-END SCHEDULE?") - #if len(program_outline) == 2: + #if len(outline) == 2: #from pudb import set_trace; set_trace() # }}} @@ -714,7 +714,7 @@ def generate_loop_outlines_internal( assert sched_state.within_subkernel is False for result in generate_loop_outlines_internal( sched_state.copy( - program_outline=sched_state.program_outline + ( + outline=sched_state.outline + ( next_preoutline_item,), preoutline=sched_state.preoutline[1:], within_subkernel=True, @@ -730,7 +730,7 @@ def generate_loop_outlines_internal( if sched_state.active_inames == sched_state.enclosing_subkernel_inames: for result in generate_loop_outlines_internal( sched_state.copy( - program_outline=sched_state.program_outline + ( + outline=sched_state.outline + ( next_preoutline_item,), preoutline=sched_state.preoutline[1:], within_subkernel=False, @@ -752,7 +752,7 @@ def generate_loop_outlines_internal( and next_preoutline_item.originating_insn_id is None): for result in generate_loop_outlines_internal( sched_state.copy( - program_outline=sched_state.program_outline + ( + outline=sched_state.outline + ( next_preoutline_item,), preoutline=sched_state.preoutline[1:]), allow_boost=rec_allow_boost, @@ -933,8 +933,8 @@ def generate_loop_outlines_internal( scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, insn_ids_to_try=new_insn_ids_to_try, - program_outline=( - sched_state.program_outline + ( + outline=( + sched_state.outline + ( RunInstruction(insn_id=insn.id),)), preoutline=( sched_state.preoutline @@ -1001,7 +1001,7 @@ def generate_loop_outlines_internal( last_entered_loop not in subdep.boostable_into): print( "%(warn)swarning:%(reset_all)s '%(iname)s', " - "which the program_outline is " + "which the outline is " "currently stuck inside of, seems mis-nested. " "'%(subdep)s' must occur " "before '%(dep)s', " "but '%(subdep)s must be outside " @@ -1030,7 +1030,7 @@ def generate_loop_outlines_internal( seen_an_insn = False ignore_count = 0 - for sched_item in sched_state.program_outline[::-1]: + for sched_item in sched_state.outline[::-1]: if isinstance(sched_item, RunInstruction): seen_an_insn = True elif isinstance(sched_item, LeaveLoop): @@ -1048,8 +1048,8 @@ def generate_loop_outlines_internal( for sub_sched in generate_loop_outlines_internal( sched_state.copy( - program_outline=( - sched_state.program_outline + outline=( + sched_state.outline + (LeaveLoop(iname=last_entered_loop),)), active_inames=sched_state.active_inames[:-1], preoutline=( @@ -1192,7 +1192,7 @@ def generate_loop_outlines_internal( # {{{ tier building - # Build priority tiers. If a program_outline is found in the first tier, then + # Build priority tiers. If a outline is found in the first tier, then # loops in the second are not even tried (and so on). loop_priority_set = set().union(*[set(prio) for prio in @@ -1254,14 +1254,14 @@ def generate_loop_outlines_internal( key=lambda iname: ( iname_to_usefulness.get(iname, 0), # Sort by iname to achieve deterministic - # ordering of generated program_outlines. + # ordering of generated outlines. iname), reverse=True): for sub_sched in generate_loop_outlines_internal( sched_state.copy( - program_outline=( - sched_state.program_outline + outline=( + sched_state.outline + (EnterLoop(iname=iname),)), active_inames=( sched_state.active_inames + (iname,)), @@ -1285,8 +1285,8 @@ def generate_loop_outlines_internal( if debug_mode: print(75*"=") - inp = six.moves.input("Hit Enter for next program_outline, " - "or enter a number to examine program_outlines of a " + inp = six.moves.input("Hit Enter for next outline, " + "or enter a number to examine outlines of a " "different length:") if inp: raise ScheduleDebugInput(inp) @@ -1296,7 +1296,7 @@ def generate_loop_outlines_internal( and not sched_state.unscheduled_insn_ids and not sched_state.preoutline): # if done, yield result - debug.log_success(sched_state.program_outline) + debug.log_success(sched_state.outline) for boost_insn_id, boost_inames in sched_state.uses_of_boostability: warn_with_kernel( @@ -1307,7 +1307,7 @@ def generate_loop_outlines_internal( % (boost_insn_id, ", ".join(boost_inames)), DeprecationWarning) - yield sched_state.program_outline + yield sched_state.outline else: if not allow_boost and allow_boost is not None: @@ -1319,18 +1319,18 @@ def generate_loop_outlines_internal( else: # dead end if debug is not None: - debug.log_dead_end(sched_state.program_outline) + debug.log_dead_end(sched_state.outline) # }}} # {{{ convert barrier instructions to proper barriers -def convert_barrier_instructions_to_barriers(kernel, program_outline): +def convert_barrier_instructions_to_barriers(kernel, outline): from loopy.kernel.instruction import BarrierInstruction result = [] - for sched_item in program_outline: + for sched_item in outline: if isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] if isinstance(insn, BarrierInstruction): @@ -1589,17 +1589,17 @@ def barrier_kind_more_or_equally_global(kind1, kind2): return (kind1 == kind2) or (kind1 == "global" and kind2 == "local") -def insn_ids_reaching_end_without_intervening_barrier(program_outline, kind): - return _insn_ids_reaching_end(program_outline, kind, reverse=False) +def insn_ids_reaching_end_without_intervening_barrier(outline, kind): + return _insn_ids_reaching_end(outline, kind, reverse=False) -def insn_ids_reachable_from_start_without_intervening_barrier(program_outline, kind): - return _insn_ids_reaching_end(program_outline, kind, reverse=True) +def insn_ids_reachable_from_start_without_intervening_barrier(outline, kind): + return _insn_ids_reaching_end(outline, kind, reverse=True) -def _insn_ids_reaching_end(program_outline, kind, reverse): +def _insn_ids_reaching_end(outline, kind, reverse): if reverse: - program_outline = reversed(program_outline) + outline = reversed(outline) enter_scope_item_kind = LeaveLoop leave_scope_item_kind = EnterLoop else: @@ -1608,7 +1608,7 @@ def _insn_ids_reaching_end(program_outline, kind, reverse): insn_ids_alive_at_scope = [set()] - for sched_item in program_outline: + for sched_item in outline: if isinstance(sched_item, enter_scope_item_kind): insn_ids_alive_at_scope.append(set()) elif isinstance(sched_item, leave_scope_item_kind): @@ -1648,7 +1648,7 @@ def _insn_ids_reaching_end(program_outline, kind, reverse): return insn_ids_alive_at_scope[-1] -def append_barrier_or_raise_error(program_outline, dep, verify_only): +def append_barrier_or_raise_error(outline, dep, verify_only): if verify_only: from loopy.diagnostic import MissingBarrierError raise MissingBarrierError( @@ -1666,7 +1666,7 @@ def append_barrier_or_raise_error(program_outline, dep, verify_only): comment = "for %s (%s)" % ( dep.variable, dep.dep_descr.format( tgt=dep.target.id, src=dep.source.id)) - program_outline.append(Barrier( + outline.append(Barrier( comment=comment, synchronization_kind=dep.var_kind, mem_kind=dep.var_kind, @@ -1674,7 +1674,7 @@ def append_barrier_or_raise_error(program_outline, dep, verify_only): def insert_barriers( - kernel, program_outline, synchronization_kind, verify_only, level=0): + kernel, outline, synchronization_kind, verify_only, level=0): """ :arg synchronization_kind: "local" or "global". The :attr:`Barrier.synchronization_kind` to be inserted. Generally, this @@ -1687,26 +1687,26 @@ def insert_barriers( # {{{ insert barriers at outermost scheduling level - def insert_barriers_at_outer_level(program_outline, reverse=False): + def insert_barriers_at_outer_level(outline, reverse=False): dep_tracker = DependencyTracker(kernel, var_kind=synchronization_kind, reverse=reverse) if reverse: # Populate the dependency tracker with sources from the tail end of - # the program_outline block. + # the outline block. for insn_id in ( insn_ids_reaching_end_without_intervening_barrier( - program_outline, synchronization_kind)): + outline, synchronization_kind)): dep_tracker.add_source(insn_id) result = [] i = 0 - while i < len(program_outline): - sched_item = program_outline[i] + while i < len(outline): + sched_item = outline[i] if isinstance(sched_item, EnterLoop): - subloop, new_i = gather_schedule_block(program_outline, i) + subloop, new_i = gather_schedule_block(outline, i) loop_head = ( insn_ids_reachable_from_start_without_intervening_barrier( @@ -1773,7 +1773,7 @@ def insert_barriers( i += 1 else: - raise ValueError("unexpected program_outline item type '%s'" + raise ValueError("unexpected outline item type '%s'" % type(sched_item).__name__) return result @@ -1784,11 +1784,11 @@ def insert_barriers( result = [] i = 0 - while i < len(program_outline): - sched_item = program_outline[i] + while i < len(outline): + sched_item = outline[i] if isinstance(sched_item, EnterLoop): - subloop, new_i = gather_schedule_block(program_outline, i) + subloop, new_i = gather_schedule_block(outline, i) new_subloop = insert_barriers( kernel, subloop[1:-1], synchronization_kind, verify_only, level + 1) @@ -1803,7 +1803,7 @@ def insert_barriers( i += 1 else: - raise ValueError("unexpected program_outline item type '%s'" + raise ValueError("unexpected outline item type '%s'" % type(sched_item).__name__) # }}} @@ -1853,11 +1853,11 @@ def generate_loop_outlines_inner(kernel, debug_args={}): from loopy.check import pre_schedule_checks pre_schedule_checks(kernel) - program_outline_count = 0 + outline_count = 0 debug = ScheduleDebugger(**debug_args) - preoutline = kernel.program_outline if ( + preoutline = kernel.outline if ( kernel.state == KernelState.SCHEDULED) else () preoutlined_inames = set( @@ -1906,7 +1906,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): entered_inames=frozenset(), enclosing_subkernel_inames=(), - program_outline=(), + outline=(), unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), scheduled_insn_ids=frozenset(), @@ -1978,7 +1978,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): logger.debug("%s: barrier insertion: done" % kernel.name) new_kernel = kernel.copy( - program_outline=gen_sched, + outline=gen_sched, state=KernelState.SCHEDULED) from loopy.outline.device_mapping import \ @@ -1993,7 +1993,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): debug.start() - program_outline_count += 1 + outline_count += 1 except KeyboardInterrupt: print() @@ -2004,14 +2004,14 @@ def generate_loop_outlines_inner(kernel, debug_args={}): raise debug.done_scheduling() - if not program_outline_count: + if not outline_count: print(75*"-") - print("ERROR: Sorry--loo.py did not find a program_outline for your kernel.") + print("ERROR: Sorry--loo.py did not find a outline for your kernel.") print(75*"-") print_longest_dead_end() - raise RuntimeError("no valid program_outlines found") + raise RuntimeError("no valid outlines found") - logger.info("%s: program_outline done" % kernel.name) + logger.info("%s: outline done" % kernel.name) # }}} @@ -2053,13 +2053,13 @@ def get_one_outlined_kernel(kernel): try: result = schedule_cache[sched_cache_key] - logger.debug("%s: program_outline cache hit" % kernel.name) + logger.debug("%s: outline cache hit" % kernel.name) from_cache = True except KeyError: pass if not from_cache: - with ProcessLogger(logger, "%s: program_outline" % kernel.name): + with ProcessLogger(logger, "%s: outline" % kernel.name): with MinRecursionLimitForScheduling(kernel): result = _get_one_outlined_kernel_inner(kernel) diff --git a/loopy/outline/device_mapping.py b/loopy/outline/device_mapping.py index 9b6662a10..20cb1ca7c 100644 --- a/loopy/outline/device_mapping.py +++ b/loopy/outline/device_mapping.py @@ -41,13 +41,13 @@ def map_schedule_onto_host_or_device(kernel): + kernel.target.device_program_name_suffix) if not kernel.target.split_kernel_at_global_barriers(): - new_program_outline = ( + new_outline = ( [CallKernel(kernel_name=device_prog_name_gen(), extra_args=[], extra_inames=[])] + - list(kernel.program_outline) + + list(kernel.outline) + [ReturnFromKernel(kernel_name=kernel.name)]) - kernel = kernel.copy(program_outline=new_program_outline) + kernel = kernel.copy(outline=new_outline) else: kernel = map_schedule_onto_host_or_device_impl( kernel, device_prog_name_gen) @@ -56,21 +56,21 @@ def map_schedule_onto_host_or_device(kernel): def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): - program_outline = kernel.program_outline - loop_bounds = get_block_boundaries(program_outline) + outline = kernel.outline + loop_bounds = get_block_boundaries(outline) # {{{ inner mapper function dummy_call = CallKernel(kernel_name="", extra_args=[], extra_inames=[]) dummy_return = ReturnFromKernel(kernel_name="") - def inner_mapper(start_idx, end_idx, new_program_outline): - program_outline_required_splitting = False + def inner_mapper(start_idx, end_idx, new_outline): + outline_required_splitting = False i = start_idx current_chunk = [] while i <= end_idx: - sched_item = program_outline[i] + sched_item = outline[i] if isinstance(sched_item, RunInstruction): current_chunk.append(sched_item) @@ -78,43 +78,43 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): elif isinstance(sched_item, EnterLoop): loop_end = loop_bounds[i] - inner_program_outline = [] + inner_outline = [] loop_required_splitting = inner_mapper( - i + 1, loop_end - 1, inner_program_outline) + i + 1, loop_end - 1, inner_outline) - start_item = program_outline[i] - end_item = program_outline[loop_end] + start_item = outline[i] + end_item = outline[loop_end] i = loop_end + 1 if loop_required_splitting: - program_outline_required_splitting = True + outline_required_splitting = True if current_chunk: - new_program_outline.extend( + new_outline.extend( [dummy_call.copy()] + current_chunk + [dummy_return.copy()]) - new_program_outline.extend( + new_outline.extend( [start_item] + - inner_program_outline + + inner_outline + [end_item]) current_chunk = [] else: current_chunk.extend( [start_item] + - inner_program_outline + + inner_outline + [end_item]) elif isinstance(sched_item, Barrier): if sched_item.synchronization_kind == "global": # Wrap the current chunk into a kernel call. - program_outline_required_splitting = True + outline_required_splitting = True if current_chunk: - new_program_outline.extend( + new_outline.extend( [dummy_call.copy()] + current_chunk + [dummy_return.copy()]) - new_program_outline.append(sched_item) + new_outline.append(sched_item) current_chunk = [] else: current_chunk.append(sched_item) @@ -123,45 +123,45 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): raise LoopyError("unexpected type of schedule item: %s" % type(sched_item).__name__) - if current_chunk and program_outline_required_splitting: - # Wrap remainder of program_outline into a kernel call. - new_program_outline.extend( + if current_chunk and outline_required_splitting: + # Wrap remainder of outline into a kernel call. + new_outline.extend( [dummy_call.copy()] + current_chunk + [dummy_return.copy()]) else: - new_program_outline.extend(current_chunk) + new_outline.extend(current_chunk) - return program_outline_required_splitting + return outline_required_splitting # }}} - new_program_outline = [] - split_kernel = inner_mapper(0, len(program_outline) - 1, new_program_outline) + new_outline = [] + split_kernel = inner_mapper(0, len(outline) - 1, new_outline) if not split_kernel: # Wrap everything into a kernel call. - new_program_outline = ( + new_outline = ( [dummy_call.copy()] + - new_program_outline + + new_outline + [dummy_return.copy()]) # Assign names, extra_inames to CallKernel / ReturnFromKernel instructions inames = [] - for idx, sched_item in enumerate(new_program_outline): + for idx, sched_item in enumerate(new_outline): if isinstance(sched_item, CallKernel): last_kernel_name = device_prog_name_gen() - new_program_outline[idx] = sched_item.copy( + new_outline[idx] = sched_item.copy( kernel_name=last_kernel_name, extra_inames=list(inames)) elif isinstance(sched_item, ReturnFromKernel): - new_program_outline[idx] = sched_item.copy( + new_outline[idx] = sched_item.copy( kernel_name=last_kernel_name) elif isinstance(sched_item, EnterLoop): inames.append(sched_item.iname) elif isinstance(sched_item, LeaveLoop): inames.pop() - new_kernel = kernel.copy(program_outline=new_program_outline) + new_kernel = kernel.copy(outline=new_outline) return new_kernel diff --git a/loopy/outline/tools.py b/loopy/outline/tools.py index ebef9e59e..ea757b837 100644 --- a/loopy/outline/tools.py +++ b/loopy/outline/tools.py @@ -27,7 +27,7 @@ from loopy.kernel.data import AddressSpace # {{{ block boundary finder -def get_block_boundaries(program_outline): +def get_block_boundaries(outline): """ Return a dictionary mapping indices of :class:`loopy.outline.BlockBeginItem`s to @@ -36,7 +36,7 @@ def get_block_boundaries(program_outline): from loopy.outline import (BeginBlockItem, EndBlockItem) block_bounds = {} active_blocks = [] - for idx, sched_item in enumerate(program_outline): + for idx, sched_item in enumerate(outline): if isinstance(sched_item, BeginBlockItem): active_blocks.append(idx) elif isinstance(sched_item, EndBlockItem): @@ -75,12 +75,12 @@ def temporaries_written_in_subkernel(kernel, subkernel): def add_extra_args_to_schedule(kernel): """ Fill the `extra_args` fields in all the :class:`loopy.outline.CallKernel` - instructions in the program_outline with global temporaries. + instructions in the outline with global temporaries. """ - new_program_outline = [] + new_outline = [] from loopy.outline import CallKernel - for sched_item in kernel.program_outline: + for sched_item in kernel.outline: if isinstance(sched_item, CallKernel): subkernel = sched_item.kernel_name @@ -98,11 +98,11 @@ def add_extra_args_to_schedule(kernel): and tv not in sched_item.extra_args) - new_program_outline.append(sched_item.copy( + new_outline.append(sched_item.copy( extra_args=sched_item.extra_args + sorted(more_args))) else: - new_program_outline.append(sched_item) + new_outline.append(sched_item) - return kernel.copy(program_outline=new_program_outline) + return kernel.copy(outline=new_outline) # }}} diff --git a/loopy/statistics.py b/loopy/statistics.py index a78b25b33..063b0cd15 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1740,7 +1740,7 @@ def get_synchronization_map(knl, subgroup_size=None): else: return one - for sched_item in knl.program_outline: + for sched_item in knl.outline: if isinstance(sched_item, EnterLoop): if sched_item.iname: # (if not empty) iname_list.append(sched_item.iname) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 72905f3a6..b44fbefa7 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -515,7 +515,7 @@ class CFamilyASTBuilder(ASTBuilderBase): # whether this is the first device program in the schedule. is_first_dev_prog = codegen_state.is_generating_device_code for i in range(schedule_index): - if isinstance(kernel.program_outline[i], CallKernel): + if isinstance(kernel.outline[i], CallKernel): is_first_dev_prog = False break if is_first_dev_prog: @@ -604,7 +604,7 @@ class CFamilyASTBuilder(ASTBuilderBase): from loopy.outline.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - subkernel = kernel.program_outline[schedule_index].kernel_name + subkernel = kernel.outline[schedule_index].kernel_name sub_knl_temps = ( temporaries_read_in_subkernel(kernel, subkernel) | temporaries_written_in_subkernel(kernel, subkernel)) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 331b64bf4..229e89f0b 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -249,7 +249,7 @@ class CUDACASTBuilder(CFamilyASTBuilder): _, local_grid_size = \ codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( - codegen_state.kernel.program_outline, schedule_index)) + codegen_state.kernel.outline, schedule_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_grid_size): diff --git a/loopy/target/execution.py b/loopy/target/execution.py index ef145fbe8..b540f4f58 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -754,7 +754,7 @@ class KernelExecutorBase(object): from loopy.type_inference import infer_unknown_types kernel = infer_unknown_types(kernel, expect_completion=True) - if kernel.program_outline is None: + if kernel.outline is None: from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 554d1dea3..e55651c8a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -407,7 +407,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): from loopy.outline import get_insn_ids_for_block_at _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( - codegen_state.kernel.program_outline, schedule_index)) + codegen_state.kernel.outline, schedule_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_sizes): diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 584d257ed..fba34db4a 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -64,17 +64,17 @@ class LivenessAnalysis(object): def __init__(self, kernel): self.kernel = kernel - self.program_outline = self.kernel.program_outline + self.outline = self.kernel.outline @memoize_method def get_successor_relation(self): successors = {} - block_bounds = get_block_boundaries(self.kernel.program_outline) + block_bounds = get_block_boundaries(self.kernel.outline) for idx, (item, next_item) in enumerate(zip( - reversed(self.program_outline), - reversed(self.program_outline + [None]))): - sched_idx = len(self.program_outline) - idx - 1 + reversed(self.outline), + reversed(self.outline + [None]))): + sched_idx = len(self.outline) - idx - 1 # Look at next_item if next_item is None: @@ -105,10 +105,10 @@ class LivenessAnalysis(object): return successors def get_gen_and_kill_sets(self): - gen = dict((idx, set()) for idx in range(len(self.program_outline))) - kill = dict((idx, set()) for idx in range(len(self.program_outline))) + gen = dict((idx, set()) for idx in range(len(self.outline))) + kill = dict((idx, set()) for idx in range(len(self.outline))) - for sched_idx, sched_item in enumerate(self.program_outline): + for sched_idx, sched_item in enumerate(self.outline): if not isinstance(sched_item, RunInstruction): continue insn = self.kernel.id_to_insn[sched_item.insn_id] @@ -141,14 +141,14 @@ class LivenessAnalysis(object): gen, kill = self.get_gen_and_kill_sets() # Fixed point iteration for liveness analysis - lr = LivenessResult.make_empty(len(self.program_outline)) + lr = LivenessResult.make_empty(len(self.outline)) prev_lr = None while prev_lr != lr: from copy import deepcopy prev_lr = deepcopy(lr) - for idx in range(len(self.program_outline) - 1, -1, -1): + for idx in range(len(self.outline) - 1, -1, -1): for succ in successors[idx]: lr[idx].live_out.update(lr[succ].live_in) lr[idx].live_in = gen[idx] | (lr[idx].live_out - kill[idx]) @@ -160,13 +160,13 @@ class LivenessAnalysis(object): def print_liveness(self): print(75 * "-") print("LIVE IN:") - for sched_idx, sched_item in enumerate(self.program_outline): + for sched_idx, sched_item in enumerate(self.outline): print("{item}: {{{vars}}}".format( item=sched_idx, vars=", ".join(sorted(self[sched_idx].live_in)))) print(75 * "-") print("LIVE OUT:") - for sched_idx, sched_item in enumerate(self.program_outline): + for sched_idx, sched_item in enumerate(self.outline): print("{item}: {{{vars}}}".format( item=sched_idx, vars=", ".join(sorted(self[sched_idx].live_out)))) @@ -316,7 +316,7 @@ class TemporarySaver(object): def subkernel_to_slice_indices(self): result = {} - for sched_item_idx, sched_item in enumerate(self.kernel.program_outline): + for sched_item_idx, sched_item in enumerate(self.kernel.outline): if isinstance(sched_item, CallKernel): start_idx = sched_item_idx elif isinstance(sched_item, ReturnFromKernel): @@ -331,7 +331,7 @@ class TemporarySaver(object): within_subkernel = False result = {} - for sched_item_idx, sched_item in enumerate(self.kernel.program_outline): + for sched_item_idx, sched_item in enumerate(self.kernel.outline): if isinstance(sched_item, CallKernel): within_subkernel = True result[sched_item.kernel_name] = frozenset(current_outer_inames) @@ -356,14 +356,14 @@ class TemporarySaver(object): try: pre_barrier = next(item for item in - self.kernel.program_outline[subkernel_start::-1] + self.kernel.outline[subkernel_start::-1] if is_global_barrier(item)).originating_insn_id except StopIteration: pre_barrier = None try: post_barrier = next(item for item in - self.kernel.program_outline[subkernel_end:] + self.kernel.outline[subkernel_end:] if is_global_barrier(item)).originating_insn_id except StopIteration: post_barrier = None @@ -751,7 +751,7 @@ def save_and_reload_temporaries(knl): from loopy.outline.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - for sched_idx, sched_item in enumerate(knl.program_outline): + for sched_idx, sched_item in enumerate(knl.outline): if isinstance(sched_item, CallKernel): # Any written temporary that is live-out needs to be read into @@ -771,7 +771,7 @@ def save_and_reload_temporaries(knl): saver.reload(temporary, sched_item.kernel_name) elif isinstance(sched_item, ReturnFromKernel): - if sched_idx == len(knl.program_outline) - 1: + if sched_idx == len(knl.outline) - 1: # Kernel exit: nothing live interesting_temporaries = set() else: diff --git a/test/test_loopy.py b/test/test_loopy.py index 08ae85222..eaab2fc29 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2214,7 +2214,7 @@ def barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): seen_barrier = False loop_level = 0 - for sched_item in knl.program_outline: + for sched_item in knl.outline: if isinstance(sched_item, RunInstruction): if sched_item.insn_id == id1: watch_for_barrier = True -- GitLab From ac28e5638465f8bd17a7041e8e3d429e3eda9019 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:02:06 -0600 Subject: [PATCH 08/56] renamed (un)scheduled_insn_ids->(un)outlined_insn_ids --- loopy/outline/__init__.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 377971c24..12b2081d8 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -600,9 +600,9 @@ class SchedulerState(ImmutableRecord): .. attribute:: outline - .. attribute:: scheduled_insn_ids + .. attribute:: outlined_insn_ids - .. attribute:: unscheduled_insn_ids + .. attribute:: unoutlined_insn_ids .. attribute:: preoutline @@ -780,7 +780,7 @@ def generate_loop_outlines_internal( if sched_state.insn_ids_to_try is None: insn_ids_to_try = sorted( # Non-preoutlined instructions go first. - sched_state.unscheduled_insn_ids - sched_state.preoutlined_insn_ids, + sched_state.unoutlined_insn_ids - sched_state.preoutlined_insn_ids, key=insn_sort_key, reverse=True) else: insn_ids_to_try = sched_state.insn_ids_to_try @@ -793,7 +793,7 @@ def generate_loop_outlines_internal( for insn_id in insn_ids_to_try: insn = kernel.id_to_insn[insn_id] - is_ready = insn.depends_on <= sched_state.scheduled_insn_ids + is_ready = insn.depends_on <= sched_state.outlined_insn_ids if not is_ready: if debug_mode: @@ -802,7 +802,7 @@ def generate_loop_outlines_internal( # print("instruction '%s' is missing insn depedencies '%s'" % ( # format_insn(kernel, insn.id), ",".join( - # insn.depends_on - sched_state.scheduled_insn_ids))) + # insn.depends_on - sched_state.outlined_insn_ids))) pass continue @@ -930,8 +930,8 @@ def generate_loop_outlines_internal( (insn.id, orig_have & insn.boostable_into)) new_sched_state = sched_state.copy( - scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, - unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, + outlined_insn_ids=sched_state.outlined_insn_ids | iid_set, + unoutlined_insn_ids=sched_state.unoutlined_insn_ids - iid_set, insn_ids_to_try=new_insn_ids_to_try, outline=( sched_state.outline + ( @@ -982,7 +982,7 @@ def generate_loop_outlines_internal( # If the iname is not breakable, then check that we've # scheduled all the instructions that require it. - for insn_id in sched_state.unscheduled_insn_ids: + for insn_id in sched_state.unoutlined_insn_ids: insn = kernel.id_to_insn[insn_id] if last_entered_loop in kernel.insn_inames(insn): if debug_mode: @@ -992,7 +992,7 @@ def generate_loop_outlines_internal( # check if there's a dependency of insn that needs to be # outside of last_entered_loop. for subdep_id in gen_dependencies_except(kernel, insn_id, - sched_state.scheduled_insn_ids): + sched_state.outlined_insn_ids): subdep = kernel.id_to_insn[insn_id] want = (kernel.insn_inames(subdep_id) - sched_state.parallel_inames) @@ -1069,7 +1069,7 @@ def generate_loop_outlines_internal( # Find inames that are being referenced by as yet unscheduled instructions. needed_inames = set() - for insn_id in sched_state.unscheduled_insn_ids: + for insn_id in sched_state.unoutlined_insn_ids: needed_inames.update(kernel.insn_inames(insn_id)) needed_inames = (needed_inames @@ -1118,7 +1118,7 @@ def generate_loop_outlines_internal( if ( not sched_state.loop_insn_dep_map.get(iname, set()) - <= sched_state.scheduled_insn_ids): + <= sched_state.outlined_insn_ids): if debug_mode: print( "scheduling {iname} prohibited by loop dependency map " @@ -1128,7 +1128,7 @@ def generate_loop_outlines_internal( needed_insns=", ".join( sched_state.loop_insn_dep_map.get(iname, set()) - - sched_state.scheduled_insn_ids))) + sched_state.outlined_insn_ids))) continue @@ -1152,7 +1152,7 @@ def generate_loop_outlines_internal( & set(kernel.temporary_variables)): writer_insn, = kernel.writer_map()[domain_par] - if writer_insn not in sched_state.scheduled_insn_ids: + if writer_insn not in sched_state.outlined_insn_ids: data_dep_written = False if debug_mode: print("iname '%s' not scheduled because domain " @@ -1293,7 +1293,7 @@ def generate_loop_outlines_internal( if ( not sched_state.active_inames - and not sched_state.unscheduled_insn_ids + and not sched_state.unoutlined_insn_ids and not sched_state.preoutline): # if done, yield result debug.log_success(sched_state.outline) @@ -1908,8 +1908,8 @@ def generate_loop_outlines_inner(kernel, debug_args={}): outline=(), - unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), - scheduled_insn_ids=frozenset(), + unoutlined_insn_ids=set(insn.id for insn in kernel.instructions), + outlined_insn_ids=frozenset(), within_subkernel=kernel.state != KernelState.SCHEDULED, may_schedule_global_barriers=True, -- GitLab From 049fd2385ad83f42827b6cbd84fe7ad5b4db55ed Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:04:42 -0600 Subject: [PATCH 09/56] renamed ScheduleIndexInfo->OutlineIndexInfo --- loopy/codegen/control.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index daa02c81c..719596a47 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -269,7 +269,7 @@ def build_loop_nest(codegen_state, schedule_index): from pytools import ImmutableRecord - class ScheduleIndexInfo(ImmutableRecord): + class OutlineIndexInfo(ImmutableRecord): """ .. attribute:: schedule_index .. attribute:: admissible_cond_inames @@ -281,7 +281,7 @@ def build_loop_nest(codegen_state, schedule_index): from loopy.codegen.bounds import get_usable_inames_for_conditional sched_index_info_entries = [ - ScheduleIndexInfo( + OutlineIndexInfo( schedule_indices=[i], admissible_cond_inames=( get_usable_inames_for_conditional(kernel, i)), -- GitLab From af6e68bdbb4d7c2edf99888f1f4664e7aadd95db Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:06:46 -0600 Subject: [PATCH 10/56] renamed MinRecursionLimitForScheduling->MinRecursionLimitForOutlining --- loopy/outline/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 12b2081d8..ad947054f 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1819,7 +1819,7 @@ def insert_barriers( # }}} -class MinRecursionLimitForScheduling(MinRecursionLimit): +class MinRecursionLimitForOutlining(MinRecursionLimit): def __init__(self, kernel): MinRecursionLimit.__init__(self, len(kernel.instructions) * 2 + len(kernel.all_inames()) * 4) @@ -1832,14 +1832,14 @@ def generate_loop_outlines(kernel, debug_args={}): .. warning:: This function needs to be called inside (another layer) of a - :class:`MinRecursionLimitForScheduling` context manager, and the + :class:`MinRecursionLimitForOutlining` context manager, and the context manager needs to end *after* the last reference to the generators has gone out of scope. Otherwise, the high-recursion-limit generator chain may not be successfully garbage-collected and cause an internal error in the Python runtime. """ - with MinRecursionLimitForScheduling(kernel): + with MinRecursionLimitForOutlining(kernel): for sched in generate_loop_outlines_inner(kernel, debug_args=debug_args): yield sched @@ -2025,7 +2025,7 @@ def _get_one_outlined_kernel_inner(kernel): # This helper function exists to ensure that the generator chain is fully # out of scope after the function returns. This allows it to be # garbage-collected in the exit handler of the - # MinRecursionLimitForScheduling context manager in the surrounding + # MinRecursionLimitForOutlining context manager in the surrounding # function, because it possilby cannot be safely collected with a lower # recursion limit without crashing the Python runtime. # @@ -2060,7 +2060,7 @@ def get_one_outlined_kernel(kernel): if not from_cache: with ProcessLogger(logger, "%s: outline" % kernel.name): - with MinRecursionLimitForScheduling(kernel): + with MinRecursionLimitForOutlining(kernel): result = _get_one_outlined_kernel_inner(kernel) if CACHING_ENABLED and not from_cache: -- GitLab From bb61f36a15026f50db930c1de3620b74725bcc76 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:08:05 -0600 Subject: [PATCH 11/56] ScheduleDebugInput->OutlineDebugInput --- loopy/outline/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index ad947054f..93859b4b1 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -555,7 +555,7 @@ class ScheduleDebugger: self.start_time = time() -class ScheduleDebugInput(Exception): +class OutlineDebugInput(Exception): pass # }}} @@ -1289,7 +1289,7 @@ def generate_loop_outlines_internal( "or enter a number to examine outlines of a " "different length:") if inp: - raise ScheduleDebugInput(inp) + raise OutlineDebugInput(inp) if ( not sched_state.active_inames @@ -1950,7 +1950,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): sched_state, debug=debug, **schedule_gen_kwargs): pass - except ScheduleDebugInput as e: + except OutlineDebugInput as e: debug.debug_length = int(str(e)) continue -- GitLab From 14fc4684b14085f7b71ad37fd7de41bb044ab1db Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:09:07 -0600 Subject: [PATCH 12/56] SchedulerState->OutlinerState --- loopy/outline/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 93859b4b1..a779e41a2 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -563,7 +563,7 @@ class OutlineDebugInput(Exception): # {{{ scheduling algorithm -class SchedulerState(ImmutableRecord): +class OutlinerState(ImmutableRecord): """ .. attribute:: kernel @@ -1887,7 +1887,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) - sched_state = SchedulerState( + sched_state = OutlinerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, loop_insn_dep_map=find_loop_insn_dep_map( -- GitLab From a0869bf73d95ea260dddf48bb2e978141695c649 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:11:00 -0600 Subject: [PATCH 13/56] ScheduleDebugger->OutlineDebugger --- loopy/outline/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index a779e41a2..17c680e2e 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -496,7 +496,7 @@ def dump_schedule(kernel, outline): for i, line in enumerate(lines)) -class ScheduleDebugger: +class OutlineDebugger: def __init__(self, debug_length=None, interactive=True): self.longest_rejected_schedule = [] self.success_counter = 0 @@ -1855,7 +1855,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): outline_count = 0 - debug = ScheduleDebugger(**debug_args) + debug = OutlineDebugger(**debug_args) preoutline = kernel.outline if ( kernel.state == KernelState.SCHEDULED) else () -- GitLab From 87a644ef1b9b2435fe47445a6ff213321c2ca305 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:12:28 -0600 Subject: [PATCH 14/56] ScheduleItem->OutlineItem --- loopy/kernel/__init__.py | 2 +- loopy/outline/__init__.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 2c864d6f8..e79de8c6c 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -160,7 +160,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: outline - *None* or a list of :class:`loopy.outline.ScheduleItem` + *None* or a list of :class:`loopy.outline.OutlineItem` .. attribute:: name .. attribute:: preambles diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 17c680e2e..5bd79e102 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -41,7 +41,7 @@ logger = logging.getLogger(__name__) # {{{ schedule items -class ScheduleItem(ImmutableRecord): +class OutlineItem(ImmutableRecord): __slots__ = [] def update_persistent_hash(self, key_hash, key_builder): @@ -52,11 +52,11 @@ class ScheduleItem(ImmutableRecord): key_builder.rec(key_hash, getattr(self, field_name)) -class BeginBlockItem(ScheduleItem): +class BeginBlockItem(OutlineItem): pass -class EndBlockItem(ScheduleItem): +class EndBlockItem(OutlineItem): pass @@ -68,7 +68,7 @@ class LeaveLoop(EndBlockItem): hash_fields = __slots__ = ["iname"] -class RunInstruction(ScheduleItem): +class RunInstruction(OutlineItem): hash_fields = __slots__ = ["insn_id"] @@ -80,7 +80,7 @@ class ReturnFromKernel(EndBlockItem): hash_fields = __slots__ = ["kernel_name"] -class Barrier(ScheduleItem): +class Barrier(OutlineItem): """ .. attribute:: comment -- GitLab From edd35962ca8b1c235030d287a190a1444802af54 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:40:01 -0600 Subject: [PATCH 15/56] change schedule->outline in comments and documentation --- MEMO | 16 +++++++------- doc/ref_kernel.rst | 2 +- doc/tutorial.rst | 10 ++++----- examples/python/global_barrier_removal.py | 2 +- loopy/codegen/control.py | 24 ++++++++++---------- loopy/kernel/__init__.py | 2 +- loopy/kernel/instruction.py | 2 +- loopy/outline/__init__.py | 27 ++++++++++++----------- loopy/outline/device_mapping.py | 2 +- loopy/statistics.py | 2 +- loopy/transform/save.py | 6 ++--- test/test_loopy.py | 4 ++-- 12 files changed, 50 insertions(+), 49 deletions(-) diff --git a/MEMO b/MEMO index f4e5c34e4..ce361fae9 100644 --- a/MEMO +++ b/MEMO @@ -78,8 +78,8 @@ Fixes: old inames may still be around, so the rewrite may or may not have to be applied. -- Group instructions by dependency/inames for scheduling, to - increase sched. scalability +- Group instructions by dependency/inames for outlining, to + increase outline scalability - What if no universally valid precompute base index expression is found? (test_intel_matrix_mul with n = 6*16, e.g.?) @@ -200,7 +200,7 @@ Dealt with - Make sure that variables that enter into loop bounds are only written exactly once. [DONE] - - Make sure that loop bound writes are scheduled before the relevant + - Make sure that loop bound writes are outlined before the relevant loops. [DONE] - add_prefetch tagging @@ -218,7 +218,7 @@ Dealt with - Allow complex-valued arithmetic, despite CL's best efforts. -- "No schedule found" debug help: +- "No outline found" debug help: - Find longest dead-end - Automatically report on what hinders progress there @@ -231,11 +231,11 @@ Dealt with - dim_{min,max} caching - Exhaust the search for a no-boost solution first, before looking - for a schedule with boosts. + for an outline with boosts. - Pick not just axis 0, but all axes by lowest available stride -- Scheduler tries too many boostability-related options +- Outliner tries too many boostability-related options - Automatically generate testing code vs. sequential. @@ -286,7 +286,7 @@ Dealt with - implemented_domain may end up being smaller than requested in cse evaluations--check that! -- Allow prioritization of loops in scheduling. +- Allow prioritization of loops in outlining. - Make axpy better. @@ -296,7 +296,7 @@ Dealt with - Flag, exploit idempotence -- Some things involving CSEs might be impossible to schedule +- Some things involving CSEs might be impossible to outline a[i,j] = cse(b[i]) * cse(c[j]) - Be smarter about automatic local axis choice diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 409cbef57..7662bf440 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -298,7 +298,7 @@ These are usually key-value pairs. The following attributes are recognized: * ``priority=integer`` sets the instructions priority to the value ``integer``. Instructions with higher priority will be scheduled sooner, - if possible. Note that the scheduler may still schedule a lower-priority + if possible. Note that the outliner may still outline a lower-priority instruction ahead of a higher-priority one if loop orders or dependencies require it. diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 9ff452f53..fa917ac43 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1203,7 +1203,7 @@ Here is what happens when we try to generate code for the kernel: loopy.diagnostic.MissingDefinitionError: temporary variable 'tmp' gets used in subkernel 'rotate_v2_0' without a definition (maybe you forgot to call loopy.save_and_reload_temporaries?) This happens due to the kernel splitting done by :mod:`loopy`. The splitting -happens when the instruction schedule is generated. To see the schedule, we +happens when the instruction outline is generated. To see the outline, we should call :func:`loopy.get_one_outlined_kernel`: >>> knl = lp.get_one_outlined_kernel(lp.preprocess_kernel(knl)) @@ -1224,7 +1224,7 @@ should call :func:`loopy.get_one_outlined_kernel`: --------------------------------------------------------------------------- As the error message suggests, taking a look at the generated instruction -schedule will show that while ``tmp`` is assigned in the first kernel, the +outline will show that while ``tmp`` is assigned in the first kernel, the assignment to ``tmp`` is not seen by the second kernel. Because the temporary is in private memory, it does not persist across calls to device kernels (the same goes for local temporaries). @@ -1232,13 +1232,13 @@ goes for local temporaries). :mod:`loopy` provides a function called :func:`loopy.save_and_reload_temporaries` for the purpose of handling the task of saving and restoring temporary values across global barriers. This -function adds instructions to the kernel without scheduling them. That means +function adds instructions to the kernel without outlining them. That means that :func:`loopy.get_one_outlined_kernel` needs to be called one more time to -put those instructions into the schedule. +put those instructions into the outline. >>> knl = lp.get_one_outlined_kernel(lp.preprocess_kernel(knl)) >>> knl = lp.save_and_reload_temporaries(knl) - >>> knl = lp.get_one_outlined_kernel(knl) # Schedule added instructions + >>> knl = lp.get_one_outlined_kernel(knl) # Outline added instructions >>> print(knl) --------------------------------------------------------------------------- KERNEL: rotate_v2 diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index a01c66b3c..8f31a59ee 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -26,7 +26,7 @@ knl = preprocess_kernel(knl) from loopy.outline import get_one_outlined_kernel knl = get_one_outlined_kernel(knl) -# map schedule onto host or device +# map outline onto host or device print(knl) cgr = lp.generate_code_v2(knl) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 719596a47..26de6d7b6 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -171,7 +171,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): lambda inner_cgs: generate_instruction_code(inner_cgs, insn)) else: - raise RuntimeError("unexpected schedule item type: %s" + raise RuntimeError("unexpected outline item type: %s" % type(sched_item)) @@ -184,7 +184,7 @@ def get_required_predicates(kernel, sched_index): elif isinstance(sched_item, RunInstruction): my_preds = kernel.id_to_insn[sched_item.insn_id].predicates else: - raise RuntimeError("unexpected schedule item type: %s" + raise RuntimeError("unexpected outline item type: %s" % type(sched_item)) if result is None: @@ -232,7 +232,7 @@ def build_loop_nest(codegen_state, schedule_index): result.append(inner) return merge_codegen_results(codegen_state, result) - # {{{ pass 1: pre-scan schedule for my schedule item's siblings' indices + # {{{ pass 1: pre-scan outline for my outline item's siblings' indices # i.e. go up to the next LeaveLoop, and skip over inner loops. @@ -258,14 +258,14 @@ def build_loop_nest(codegen_state, schedule_index): elif isinstance(sched_item, RunInstruction): i += 1 else: - raise RuntimeError("unexpected schedule item type: %s" + raise RuntimeError("unexpected outline item type: %s" % type(sched_item)) del i # }}} - # {{{ pass 2: find admissible conditional inames for each sibling schedule item + # {{{ pass 2: find admissible conditional inames for each sibling outline item from pytools import ImmutableRecord @@ -305,7 +305,7 @@ def build_loop_nest(codegen_state, schedule_index): # }}} - # {{{ pass 3: greedily group schedule items that share admissible inames + # {{{ pass 3: greedily group outline items that share admissible inames from pytools import memoize_method @@ -340,17 +340,17 @@ def build_loop_nest(codegen_state, schedule_index): from loopy.symbolic import get_dependencies # The rough plan here is that build_insn_group starts out with the - # entirety of the current schedule item's downward siblings (i.e. all + # entirety of the current outline item's downward siblings (i.e. all # the ones up to the next LeaveLoop). It will then iterate upward to # find the largest usable conditional hoist group. # # It will then call itself recursively, telling its recursive instances # to ignore the hoist group it just found by adding that group length - # to done_group_length. (It'll also chop the set of schedule indices + # to done_group_length. (It'll also chop the set of outline indices # considered down so that a callee cannot find a *longer* hoist group.) # # Upon return the hoist is wrapped around the returned code and - # build_insn_group calls itself for the remainder of schedule indices + # build_insn_group calls itself for the remainder of outline indices # that were not in the hoist group. if not sched_index_info_entries: @@ -361,9 +361,9 @@ def build_loop_nest(codegen_state, schedule_index): current_pred_set = (origin_si_entry.required_predicates - codegen_state.implemented_predicates) - # {{{ grow schedule item group + # {{{ grow outline item group - # Keep growing schedule item group as long as group fulfills minimum + # Keep growing outline item group as long as group fulfills minimum # size requirement. bounds_check_cache = BoundsCheckCache( @@ -453,7 +453,7 @@ def build_loop_nest(codegen_state, schedule_index): result = [] else: if group_length == 1: - # group only contains starting schedule item + # group only contains starting outline item def gen_code(inner_codegen_state): result = [] for i in origin_si_entry.schedule_indices: diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index e79de8c6c..5b0e8f4cc 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1222,7 +1222,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): "iname-order", "get_visual_iname_order_embedding() could not determine a " "consistent iname nesting order. This is a possible indication " - "that the kernel may not schedule successfully, but for now " + "that the kernel may not outline successfully, but for now " "it only impacts printing of the kernel.") embedding = dict((iname, iname) for iname in self.all_inames()) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 8213c9584..fff9896d6 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -85,7 +85,7 @@ class InstructionBase(ImmutableRecord): .. attribute:: priority - Scheduling priority, an integer. Higher means 'execute sooner'. + Outlining priority, an integer. Higher means 'execute sooner'. Default 0. .. rubric :: Synchronization diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 5bd79e102..e6f8da3f8 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -39,7 +39,7 @@ import logging logger = logging.getLogger(__name__) -# {{{ schedule items +# {{{ outline items class OutlineItem(ImmutableRecord): __slots__ = [] @@ -103,7 +103,7 @@ class Barrier(OutlineItem): # }}} -# {{{ schedule utilities +# {{{ outline utilities def gather_schedule_block(outline, start_idx): assert isinstance(outline[start_idx], BeginBlockItem) @@ -606,7 +606,7 @@ class OutlinerState(ImmutableRecord): .. attribute:: preoutline - A sequence of schedule items that must be inserted into the + A sequence of outline items that must be inserted into the outline, maintaining the same relative ordering. Newly scheduled items may interleave this sequence. @@ -773,7 +773,7 @@ def generate_loop_outlines_internal( insn = kernel.id_to_insn[insn_id] # Sort by insn.id as a last criterion to achieve deterministic - # schedule generation order. + # outline generation order. return (insn.priority, len(active_groups & insn.groups), insn.id) # Use previous instruction sorting result if it is available @@ -842,7 +842,7 @@ def generate_loop_outlines_internal( if next_preoutline_insn_id != insn_id: if debug_mode: - print("can't schedule '%s' because another preoutline " + print("can't outline '%s' because another preoutline " "instruction precedes it" % format_insn(kernel, insn.id)) is_ready = False @@ -855,13 +855,13 @@ def generate_loop_outlines_internal( insn.synchronization_kind == "global": if not sched_state.may_schedule_global_barriers: if debug_mode: - print("can't schedule '%s' because global barriers are " + print("can't outline '%s' because global barriers are " "not currently allowed" % format_insn(kernel, insn.id)) is_ready = False else: if not sched_state.within_subkernel: if debug_mode: - print("can't schedule '%s' because not within subkernel" + print("can't outline '%s' because not within subkernel" % format_insn(kernel, insn.id)) is_ready = False @@ -887,7 +887,7 @@ def generate_loop_outlines_internal( # }}} if is_ready and debug_mode: - print("ready to schedule '%s'" % format_insn(kernel, insn.id)) + print("ready to outline '%s'" % format_insn(kernel, insn.id)) if is_ready and not debug_mode: iid_set = frozenset([insn.id]) @@ -1165,7 +1165,8 @@ def generate_loop_outlines_internal( # }}} - # {{{ determine if that gets us closer to being able to schedule an insn + # {{{ determine if that gets us closer to being able to add an insn to + # outline usefulness = None # highest insn priority enabled by iname @@ -1192,7 +1193,7 @@ def generate_loop_outlines_internal( # {{{ tier building - # Build priority tiers. If a outline is found in the first tier, then + # Build priority tiers. If an outline is found in the first tier, then # loops in the second are not even tried (and so on). loop_priority_set = set().union(*[set(prio) for prio in @@ -1847,7 +1848,7 @@ def generate_loop_outlines(kernel, debug_args={}): def generate_loop_outlines_inner(kernel, debug_args={}): from loopy.kernel import KernelState if kernel.state not in (KernelState.PREPROCESSED, KernelState.SCHEDULED): - raise LoopyError("cannot schedule a kernel that has not been " + raise LoopyError("cannot outline a kernel that has not been " "preprocessed") from loopy.check import pre_schedule_checks @@ -1931,7 +1932,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): def print_longest_dead_end(): if debug.interactive: print("Loo.py will now show you the scheduler state at the point") - print("where the longest (dead-end) schedule was generated, in the") + print("where the longest (dead-end) outline was generated, in the") print("the hope that some of this makes sense and helps you find") print("the issue.") print() @@ -2006,7 +2007,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): debug.done_scheduling() if not outline_count: print(75*"-") - print("ERROR: Sorry--loo.py did not find a outline for your kernel.") + print("ERROR: Sorry--loo.py did not find an outline for your kernel.") print(75*"-") print_longest_dead_end() raise RuntimeError("no valid outlines found") diff --git a/loopy/outline/device_mapping.py b/loopy/outline/device_mapping.py index 20cb1ca7c..4eca2409f 100644 --- a/loopy/outline/device_mapping.py +++ b/loopy/outline/device_mapping.py @@ -120,7 +120,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): current_chunk.append(sched_item) i += 1 else: - raise LoopyError("unexpected type of schedule item: %s" + raise LoopyError("unexpected type of outline item: %s" % type(sched_item).__name__) if current_chunk and outline_required_splitting: diff --git a/loopy/statistics.py b/loopy/statistics.py index 063b0cd15..ac3d35589 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1761,7 +1761,7 @@ def get_synchronization_map(knl, subgroup_size=None): pass else: - raise LoopyError("unexpected schedule item: %s" + raise LoopyError("unexpected outline item: %s" % type(sched_item).__name__) return result diff --git a/loopy/transform/save.py b/loopy/transform/save.py index fba34db4a..4b677fa8b 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -87,7 +87,7 @@ class LivenessAnalysis(object): CallKernel, ReturnFromKernel, Barrier)): after = set([sched_idx + 1]) else: - raise LoopyError("unexpected type of schedule item: {ty}" + raise LoopyError("unexpected type of outline item: {ty}" .format(ty=type(next_item).__name__)) # Look at item @@ -97,7 +97,7 @@ class LivenessAnalysis(object): after |= set([loop_begin]) elif not isinstance(item, (EnterLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier)): - raise LoopyError("unexpected type of schedule item: {ty}" + raise LoopyError("unexpected type of outline item: {ty}" .format(ty=type(item).__name__)) successors[sched_idx] = after @@ -727,7 +727,7 @@ def save_and_reload_temporaries(knl): Add instructions to save and reload temporary variables that are live across kernel calls. - The basic code transformation turns schedule segments:: + The basic code transformation turns outline segments:: t = <...> diff --git a/test/test_loopy.py b/test/test_loopy.py index eaab2fc29..f55e53ec0 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1068,7 +1068,7 @@ def test_kernel_splitting(ctx_factory): from loopy.outline import get_one_outlined_kernel knl = get_one_outlined_kernel(knl) - # map schedule onto host or device + # map outline onto host or device print(knl) cgr = lp.generate_code_v2(knl) @@ -1109,7 +1109,7 @@ def test_kernel_splitting_with_loop(ctx_factory): from loopy.outline import get_one_outlined_kernel knl = get_one_outlined_kernel(knl) - # map schedule onto host or device + # map outline onto host or device print(knl) cgr = lp.generate_code_v2(knl) -- GitLab From 005301eb3fae4a5541539693b66fb6e5a5aeb0bf Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:47:18 -0600 Subject: [PATCH 16/56] has_schedulable_iname_nesting->has_outlinable_iname_nesting --- loopy/__init__.py | 4 ++-- loopy/check.py | 10 +++++----- loopy/transform/iname.py | 6 +++--- test/test_loopy.py | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 33cf5e3e0..de3ff7bd5 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -77,7 +77,7 @@ from loopy.transform.iname import ( split_reduction_inward, split_reduction_outward, affine_map_inames, find_unused_axis_tag, make_reduction_inames_unique, - has_schedulable_iname_nesting, get_iname_duplication_options, + has_outlinable_iname_nesting, get_iname_duplication_options, add_inames_to_insn) from loopy.transform.instruction import ( @@ -194,7 +194,7 @@ __all__ = [ "split_reduction_inward", "split_reduction_outward", "affine_map_inames", "find_unused_axis_tag", "make_reduction_inames_unique", - "has_schedulable_iname_nesting", "get_iname_duplication_options", + "has_outlinable_iname_nesting", "get_iname_duplication_options", "add_inames_to_insn", "add_prefetch", "change_arg_to_image", diff --git a/loopy/check.py b/loopy/check.py index dd3620647..592a4d43b 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -414,12 +414,12 @@ def check_write_destinations(kernel): # }}} -# {{{ check_has_schedulable_iname_nesting +# {{{ check_has_outlinable_iname_nesting -def check_has_schedulable_iname_nesting(kernel): - from loopy.transform.iname import (has_schedulable_iname_nesting, +def check_has_outlinable_iname_nesting(kernel): + from loopy.transform.iname import (has_outlinable_iname_nesting, get_iname_duplication_options) - if not has_schedulable_iname_nesting(kernel): + if not has_outlinable_iname_nesting(kernel): import itertools as it opt = get_iname_duplication_options(kernel) opt_str = "\n".join("* Duplicate %s within instructions %s" % (i, w) @@ -662,7 +662,7 @@ def pre_schedule_checks(kernel): check_for_data_dependent_parallel_bounds(kernel) check_bounds(kernel) check_write_destinations(kernel) - check_has_schedulable_iname_nesting(kernel) + check_has_outlinable_iname_nesting(kernel) check_variable_access_ordered(kernel) logger.debug("%s: pre-schedule check: done" % kernel.name) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 96c8252ef..7f469031c 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -52,7 +52,7 @@ __doc__ = """ .. autofunction:: get_iname_duplication_options -.. autofunction:: has_schedulable_iname_nesting +.. autofunction:: has_outlinable_iname_nesting .. autofunction:: prioritize_loops @@ -1013,7 +1013,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False): * duplicating j in instruction i2 * duplicating i in instruction i2 and i3 - Use :func:`has_schedulable_iname_nesting` to decide whether an iname needs to be + Use :func:`has_outlinable_iname_nesting` to decide whether an iname needs to be duplicated in a given kernel. """ from loopy.kernel.data import ConcurrentTag @@ -1080,7 +1080,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False): yield iname, within -def has_schedulable_iname_nesting(knl): +def has_outlinable_iname_nesting(knl): """ :returns: a :class:`bool` indicating whether this kernel needs an iname duplication in order to be schedulable. diff --git a/test/test_loopy.py b/test/test_loopy.py index f55e53ec0..faa649484 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1609,12 +1609,12 @@ def test_unschedulable_kernel_detection(): knl = lp.preprocess_kernel(knl) # Check that loopy can detect the unschedulability of the kernel - assert not lp.has_schedulable_iname_nesting(knl) + assert not lp.has_outlinable_iname_nesting(knl) assert len(list(lp.get_iname_duplication_options(knl))) == 4 for inames, insns in lp.get_iname_duplication_options(knl): fixed_knl = lp.duplicate_inames(knl, inames, insns) - assert lp.has_schedulable_iname_nesting(fixed_knl) + assert lp.has_outlinable_iname_nesting(fixed_knl) knl = lp.make_kernel(["{[i,j,k,l,m]:0<=i,j,k,l,m Date: Mon, 24 Feb 2020 06:49:07 -0600 Subject: [PATCH 17/56] all_schedulable_insns->all_outlinable_insns --- loopy/check.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 592a4d43b..07e1f5d2a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -854,20 +854,20 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): def check_that_all_insns_are_scheduled(kernel): - all_schedulable_insns = set(insn.id for insn in kernel.instructions) + all_outlinable_insns = set(insn.id for insn in kernel.instructions) from loopy.outline import sched_item_to_insn_id scheduled_insns = set( insn_id for sched_item in kernel.outline for insn_id in sched_item_to_insn_id(sched_item)) - assert scheduled_insns <= all_schedulable_insns + assert scheduled_insns <= all_outlinable_insns - if scheduled_insns < all_schedulable_insns: + if scheduled_insns < all_outlinable_insns: from loopy.diagnostic import UnscheduledInstructionError raise UnscheduledInstructionError( "unscheduled instructions: '%s'" - % ', '.join(all_schedulable_insns - scheduled_insns)) + % ', '.join(all_outlinable_insns - scheduled_insns)) # }}} -- GitLab From 7f225f2a644366dad52d23f66165fd2ea5810a45 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:50:06 -0600 Subject: [PATCH 18/56] change test_unschedulable_kernel_detection->test_unoutlinable_kernel_detection --- test/test_loopy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index faa649484..c0466eb69 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1598,7 +1598,7 @@ def test_call_with_options(): # }}} -def test_unschedulable_kernel_detection(): +def test_unoutlinable_kernel_detection(): knl = lp.make_kernel(["{[i,j]:0<=i,j Date: Mon, 24 Feb 2020 06:56:28 -0600 Subject: [PATCH 19/56] change schedulable->outlinable in various comments and strings --- loopy/check.py | 2 +- loopy/options.py | 2 +- loopy/outline/__init__.py | 2 +- loopy/transform/iname.py | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 07e1f5d2a..72ab14c0a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -424,7 +424,7 @@ def check_has_outlinable_iname_nesting(kernel): opt = get_iname_duplication_options(kernel) opt_str = "\n".join("* Duplicate %s within instructions %s" % (i, w) for i, w in it.islice(opt, 3)) - raise LoopyError("Kernel does not have a schedulable iname nesting. " + raise LoopyError("Kernel does not have an outlinable iname nesting. " "In order for there to exist a feasible loop nesting, you " "may need to duplicate an iname. To do so, call " "loopy.duplicate_iname. Use loopy.get_iname_duplication_options " diff --git a/loopy/options.py b/loopy/options.py index 63089d94d..e3f3dacf4 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -93,7 +93,7 @@ class Options(ImmutableRecord): Ignore the boostable_into field of the kernel, when determining whether an iname duplication is necessary - for the kernel to be schedulable. + for the kernel to be outlinable. .. attribute:: check_dep_resolution diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index e6f8da3f8..ea6e0834d 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -763,7 +763,7 @@ def generate_loop_outlines_internal( # {{{ see if any insns are ready to be scheduled now - # Also take note of insns that have a chance of being schedulable inside + # Also take note of insns that have a chance of being outlinable inside # the current loop nest, in this set: reachable_insn_ids = set() diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 7f469031c..0ca73d6c0 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -993,10 +993,10 @@ def get_iname_duplication_options(knl, use_boostable_into=False): of an iname is necessary to ensure the schedulability of the kernel. Duplication options are returned as tuples (iname, within) as understood by :func:`duplicate_inames`. There is no guarantee, that the - transformed kernel will be schedulable, because multiple duplications + transformed kernel will be outlinable, because multiple duplications of iname may be necessary. - Some kernels require the duplication of inames in order to be schedulable, as the + Some kernels require the duplication of inames in order to be outlinable, as the forced iname dependencies define an over-determined problem to the scheduler. Consider the following minimal example: @@ -1059,7 +1059,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False): from warnings import warn from loopy.diagnostic import LoopyWarning warn("Kernel '%s' required the deprecated 'boostable_into' " - "instruction attribute in order to be schedulable!" % knl.name, + "instruction attribute in order to be outlinable!" % knl.name, LoopyWarning) # Return to avoid yielding the duplication @@ -1083,7 +1083,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False): def has_outlinable_iname_nesting(knl): """ :returns: a :class:`bool` indicating whether this kernel needs - an iname duplication in order to be schedulable. + an iname duplication in order to be outlinable. """ return not bool(next(get_iname_duplication_options(knl), False)) -- GitLab From e40c4b5ea20146b3de21fccd340b55ff9f96feea Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 06:59:36 -0600 Subject: [PATCH 20/56] changed UnscheduledInstructionError->UnoutilnedInstructionError --- loopy/check.py | 4 ++-- loopy/diagnostic.py | 2 +- test/test_loopy.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 72ab14c0a..e06daa4f8 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -864,8 +864,8 @@ def check_that_all_insns_are_scheduled(kernel): assert scheduled_insns <= all_outlinable_insns if scheduled_insns < all_outlinable_insns: - from loopy.diagnostic import UnscheduledInstructionError - raise UnscheduledInstructionError( + from loopy.diagnostic import UnoutlinedInstructionError + raise UnoutlinedInstructionError( "unscheduled instructions: '%s'" % ', '.join(all_outlinable_insns - scheduled_insns)) diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py index 561bbc7cc..eb09c9dda 100644 --- a/loopy/diagnostic.py +++ b/loopy/diagnostic.py @@ -100,7 +100,7 @@ class MissingDefinitionError(LoopyError): pass -class UnscheduledInstructionError(LoopyError): +class UnoutlinedInstructionError(LoopyError): pass diff --git a/test/test_loopy.py b/test/test_loopy.py index c0466eb69..4b0a1a1aa 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2093,8 +2093,8 @@ def test_unscheduled_insn_detection(): insn1, = lp.find_instructions(knl, "id:insn1") knl.instructions.append(insn1.copy(id="insn2")) - from loopy.diagnostic import UnscheduledInstructionError - with pytest.raises(UnscheduledInstructionError): + from loopy.diagnostic import UnoutlinedInstructionError + with pytest.raises(UnoutlinedInstructionError): lp.generate_code(knl) -- GitLab From 7e96f0740d09081eb090d5278884de6ec12bd7fc Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:01:11 -0600 Subject: [PATCH 21/56] renamed check_that_all_insns_are_scheduled->check_that_all_insns_are_outlined --- loopy/check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index e06daa4f8..f52bbb36a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -852,7 +852,7 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): # {{{ check that all instructions are scheduled -def check_that_all_insns_are_scheduled(kernel): +def check_that_all_insns_are_outlined(kernel): all_outlinable_insns = set(insn.id for insn in kernel.instructions) from loopy.outline import sched_item_to_insn_id @@ -923,7 +923,7 @@ def pre_codegen_checks(kernel): check_for_unused_hw_axes_in_insns(kernel) check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel) check_that_temporaries_are_defined_in_subkernels_where_used(kernel) - check_that_all_insns_are_scheduled(kernel) + check_that_all_insns_are_outlined(kernel) kernel.target.pre_codegen_check(kernel) check_that_shapes_and_strides_are_arguments(kernel) -- GitLab From 67369eadbda6cd908d2974a8dadcf408e844d75c Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:02:35 -0600 Subject: [PATCH 22/56] renamed test_unscheduled_insn_detection->test_unoutlined_insn_detection --- test/test_loopy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 4b0a1a1aa..127daec6b 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2081,7 +2081,7 @@ def test_tight_loop_bounds_codegen(): assert for_loop in cgr.device_code() -def test_unscheduled_insn_detection(): +def test_unoutlined_insn_detection(): knl = lp.make_kernel( "{ [i]: 0 <= i < 10 }", """ -- GitLab From 2189d082a527841a7732b38a1f68eb607a43521f Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:03:58 -0600 Subject: [PATCH 23/56] renamed scheduled_insns->outlined_insns --- loopy/check.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index f52bbb36a..8fbb6842a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -856,18 +856,18 @@ def check_that_all_insns_are_outlined(kernel): all_outlinable_insns = set(insn.id for insn in kernel.instructions) from loopy.outline import sched_item_to_insn_id - scheduled_insns = set( + outlined_insns = set( insn_id for sched_item in kernel.outline for insn_id in sched_item_to_insn_id(sched_item)) - assert scheduled_insns <= all_outlinable_insns + assert outlined_insns <= all_outlinable_insns - if scheduled_insns < all_outlinable_insns: + if outlined_insns < all_outlinable_insns: from loopy.diagnostic import UnoutlinedInstructionError raise UnoutlinedInstructionError( "unscheduled instructions: '%s'" - % ', '.join(all_outlinable_insns - scheduled_insns)) + % ', '.join(all_outlinable_insns - outlined_insns)) # }}} -- GitLab From 18d146bcf89a657e2ff4831600df18c307bf6dbe Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:08:59 -0600 Subject: [PATCH 24/56] renamed schedule_index->outline_index --- loopy/codegen/__init__.py | 18 +++++++++--------- loopy/codegen/control.py | 22 +++++++++++----------- loopy/codegen/loop.py | 6 +++--- loopy/codegen/result.py | 16 ++++++++-------- loopy/target/__init__.py | 12 ++++++------ loopy/target/c/__init__.py | 16 ++++++++-------- loopy/target/cuda.py | 6 +++--- loopy/target/ispc.py | 2 +- loopy/target/numba.py | 2 +- loopy/target/opencl.py | 6 +++--- loopy/target/pyopencl.py | 4 ++-- loopy/target/python.py | 6 +++--- 12 files changed, 58 insertions(+), 58 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 360b77e43..f5cf1a1f9 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -186,7 +186,7 @@ class CodeGenerationState(object): or the name of the device program currently being generated. - .. attribute:: schedule_index_end + .. attribute:: outline_index_end """ def __init__(self, kernel, @@ -196,7 +196,7 @@ class CodeGenerationState(object): vectorization_info=None, var_name_generator=None, is_generating_device_code=None, gen_program_name=None, - schedule_index_end=None): + outline_index_end=None): self.kernel = kernel self.implemented_data_info = implemented_data_info self.implemented_domain = implemented_domain @@ -210,7 +210,7 @@ class CodeGenerationState(object): self.var_name_generator = var_name_generator self.is_generating_device_code = is_generating_device_code self.gen_program_name = gen_program_name - self.schedule_index_end = schedule_index_end + self.outline_index_end = outline_index_end # {{{ copy helpers @@ -219,7 +219,7 @@ class CodeGenerationState(object): var_subst_map=None, vectorization_info=None, is_generating_device_code=None, gen_program_name=None, - schedule_index_end=None): + outline_index_end=None): if kernel is None: kernel = self.kernel @@ -239,8 +239,8 @@ class CodeGenerationState(object): if gen_program_name is None: gen_program_name = self.gen_program_name - if schedule_index_end is None: - schedule_index_end = self.schedule_index_end + if outline_index_end is None: + outline_index_end = self.outline_index_end return CodeGenerationState( kernel=kernel, @@ -257,7 +257,7 @@ class CodeGenerationState(object): var_name_generator=self.var_name_generator, is_generating_device_code=is_generating_device_code, gen_program_name=gen_program_name, - schedule_index_end=schedule_index_end) + outline_index_end=outline_index_end) def copy_and_assign(self, name, value): """Make a copy of self with variable *name* fixed to *value*.""" @@ -470,12 +470,12 @@ def generate_code_v2(kernel): kernel.target.host_program_name_prefix + kernel.name + kernel.target.host_program_name_suffix), - schedule_index_end=len(kernel.outline)) + outline_index_end=len(kernel.outline)) from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( codegen_state, - schedule_index=0) + outline_index=0) device_code_str = codegen_result.device_code() diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 26de6d7b6..3d40b3379 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -32,11 +32,11 @@ from loopy.outline import ( from loopy.diagnostic import LoopyError -def synthesize_idis_for_extra_args(kernel, schedule_index): +def synthesize_idis_for_extra_args(kernel, outline_index): """ :returns: A list of :class:`loopy.codegen.ImplementedDataInfo` """ - sched_item = kernel.outline[schedule_index] + sched_item = kernel.outline[outline_index] from loopy.codegen import ImplementedDataInfo from loopy.kernel.data import InameArg, AddressSpace @@ -74,14 +74,14 @@ def generate_code_for_sched_index(codegen_state, sched_index): from loopy.outline import (gather_schedule_block, get_insn_ids_for_block_at) _, past_end_i = gather_schedule_block(kernel.outline, sched_index) - assert past_end_i <= codegen_state.schedule_index_end + assert past_end_i <= codegen_state.outline_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) new_codegen_state = codegen_state.copy( is_generating_device_code=True, gen_program_name=sched_item.kernel_name, - schedule_index_end=past_end_i-1, + outline_index_end=past_end_i-1, implemented_data_info=(codegen_state.implemented_data_info + extra_args)) @@ -217,7 +217,7 @@ def group_by(l, key, merge): return result -def build_loop_nest(codegen_state, schedule_index): +def build_loop_nest(codegen_state, outline_index): # Most of the complexity of this function goes towards finding groups of # instructions that can be nested inside a shared conditional. @@ -227,7 +227,7 @@ def build_loop_nest(codegen_state, schedule_index): # some work about hoisting conditionals and directly go into recursion. if not codegen_state.ast_builder.can_implement_conditionals: result = [] - inner = generate_code_for_sched_index(codegen_state, schedule_index) + inner = generate_code_for_sched_index(codegen_state, outline_index) if inner is not None: result.append(inner) return merge_codegen_results(codegen_state, result) @@ -238,8 +238,8 @@ def build_loop_nest(codegen_state, schedule_index): my_sched_indices = [] - i = schedule_index - while i < codegen_state.schedule_index_end: + i = outline_index + while i < codegen_state.outline_index_end: sched_item = kernel.outline[i] if isinstance(sched_item, LeaveLoop): @@ -249,8 +249,8 @@ def build_loop_nest(codegen_state, schedule_index): if isinstance(sched_item, (EnterLoop, CallKernel)): _, i = gather_schedule_block(kernel.outline, i) - assert i <= codegen_state.schedule_index_end, \ - "schedule block extends beyond schedule_index_end" + assert i <= codegen_state.outline_index_end, \ + "schedule block extends beyond outline_index_end" elif isinstance(sched_item, Barrier): i += 1 @@ -271,7 +271,7 @@ def build_loop_nest(codegen_state, schedule_index): class OutlineIndexInfo(ImmutableRecord): """ - .. attribute:: schedule_index + .. attribute:: outline_index .. attribute:: admissible_cond_inames .. attribute:: required_predicates .. attribute:: used_inames_within diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 8f2a51ea5..fd7bc7b5a 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -226,7 +226,7 @@ def intersect_kernel_with_slab(kernel, slab, iname): # {{{ hw-parallel loop -def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, +def set_up_hw_parallel_loops(codegen_state, outline_index, next_func, hw_inames_left=None): kernel = codegen_state.kernel @@ -235,7 +235,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, from loopy.outline import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at( - kernel.outline, schedule_index) + kernel.outline, outline_index) if hw_inames_left is None: all_inames_by_insns = set() @@ -331,7 +331,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, .copy(kernel=slabbed_kernel)) inner = set_up_hw_parallel_loops( - new_codegen_state, schedule_index, next_func, + new_codegen_state, outline_index, next_func, hw_inames_left) result.append(inner) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 1c5957327..b59dd0563 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -273,9 +273,9 @@ def wrap_in_if(codegen_state, condition_exprs, inner): # {{{ program generation top-level -def generate_host_or_device_program(codegen_state, schedule_index): +def generate_host_or_device_program(codegen_state, outline_index): ast_builder = codegen_state.ast_builder - temp_decls = ast_builder.get_temporary_decls(codegen_state, schedule_index) + temp_decls = ast_builder.get_temporary_decls(codegen_state, outline_index) from functools import partial @@ -283,15 +283,15 @@ def generate_host_or_device_program(codegen_state, schedule_index): if codegen_state.is_generating_device_code: from loopy.outline import CallKernel assert isinstance( - codegen_state.kernel.outline[schedule_index], CallKernel) + codegen_state.kernel.outline[outline_index], CallKernel) from loopy.codegen.loop import set_up_hw_parallel_loops codegen_result = set_up_hw_parallel_loops( - codegen_state, schedule_index, + codegen_state, outline_index, next_func=partial(build_loop_nest, - schedule_index=schedule_index + 1)) + outline_index=outline_index + 1)) else: - codegen_result = build_loop_nest(codegen_state, schedule_index) + codegen_result = build_loop_nest(codegen_state, outline_index) codegen_result = merge_codegen_results( codegen_state, @@ -303,11 +303,11 @@ def generate_host_or_device_program(codegen_state, schedule_index): cur_prog = codegen_result.current_program(codegen_state) body_ast = cur_prog.ast fdecl_ast = ast_builder.get_function_declaration( - codegen_state, codegen_result, schedule_index) + codegen_state, codegen_result, outline_index) fdef_ast = ast_builder.get_function_definition( codegen_state, codegen_result, - schedule_index, fdecl_ast, body_ast) + outline_index, fdecl_ast, body_ast) codegen_result = codegen_result.with_new_program( codegen_state, diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 73d2a6328..952326e75 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -165,17 +165,17 @@ class ASTBuilderBase(object): # {{{ code generation guts def get_function_definition(self, codegen_state, codegen_result, - schedule_index, function_decl, function_body): + outline_index, function_decl, function_body): raise NotImplementedError def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): raise NotImplementedError def generate_top_of_body(self, codegen_state): return [] - def get_temporary_decls(self, codegen_state, schedule_index): + def get_temporary_decls(self, codegen_state, outline_index): raise NotImplementedError def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): @@ -261,14 +261,14 @@ class _DummyASTBlock(object): class DummyHostASTBuilder(ASTBuilderBase): def get_function_definition(self, codegen_state, codegen_result, - schedule_index, function_decl, function_body): + outline_index, function_decl, function_body): return function_body def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): return None - def get_temporary_decls(self, codegen_state, schedule_index): + def get_temporary_decls(self, codegen_state, outline_index): return [] def get_expression_to_code_mapper(self, codegen_state): diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index b44fbefa7..1a146e01c 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -494,7 +494,7 @@ class CFamilyASTBuilder(ASTBuilderBase): # {{{ code generation def get_function_definition(self, codegen_state, codegen_result, - schedule_index, + outline_index, function_decl, function_body): kernel = codegen_state.kernel @@ -514,7 +514,7 @@ class CFamilyASTBuilder(ASTBuilderBase): # the first device program. `is_first_dev_prog` determines # whether this is the first device program in the schedule. is_first_dev_prog = codegen_state.is_generating_device_code - for i in range(schedule_index): + for i in range(outline_index): if isinstance(kernel.outline[i], CallKernel): is_first_dev_prog = False break @@ -531,7 +531,7 @@ class CFamilyASTBuilder(ASTBuilderBase): index_dtype=kernel.index_dtype) decl = self.wrap_global_constant( self.get_temporary_decl( - codegen_state, schedule_index, tv, + codegen_state, outline_index, tv, decl_info)) if tv.initializer is not None: @@ -568,7 +568,7 @@ class CFamilyASTBuilder(ASTBuilderBase): return var_descr.get_arg_decl(self) def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): from cgen import FunctionDeclaration, Value name = codegen_result.current_program(codegen_state).name @@ -584,7 +584,7 @@ class CFamilyASTBuilder(ASTBuilderBase): def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): return None - def get_temporary_decls(self, codegen_state, schedule_index): + def get_temporary_decls(self, codegen_state, outline_index): from loopy.kernel.data import AddressSpace kernel = codegen_state.kernel @@ -604,7 +604,7 @@ class CFamilyASTBuilder(ASTBuilderBase): from loopy.outline.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - subkernel = kernel.outline[schedule_index].kernel_name + subkernel = kernel.outline[outline_index].kernel_name sub_knl_temps = ( temporaries_read_in_subkernel(kernel, subkernel) | temporaries_written_in_subkernel(kernel, subkernel)) @@ -621,7 +621,7 @@ class CFamilyASTBuilder(ASTBuilderBase): tv.name in sub_knl_temps): decl = self.wrap_temporary_decl( self.get_temporary_decl( - codegen_state, schedule_index, tv, idi), + codegen_state, outline_index, tv, idi), tv.address_space) if tv.initializer is not None: @@ -740,7 +740,7 @@ class CFamilyASTBuilder(ASTBuilderBase): from loopy.target.c.codegen.expression import CExpressionToCodeMapper return CExpressionToCodeMapper() - def get_temporary_decl(self, codegen_state, schedule_index, temp_var, decl_info): + def get_temporary_decl(self, codegen_state, outline_index, temp_var, decl_info): temp_var_decl = POD(self, decl_info.dtype, decl_info.name) if temp_var.read_only: diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 229e89f0b..bbd7edf24 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -230,9 +230,9 @@ class CUDACASTBuilder(CFamilyASTBuilder): # {{{ top-level codegen def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): fdecl = super(CUDACASTBuilder, self).get_function_declaration( - codegen_state, codegen_result, schedule_index) + codegen_state, codegen_result, outline_index) from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) @@ -249,7 +249,7 @@ class CUDACASTBuilder(CFamilyASTBuilder): _, local_grid_size = \ codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( - codegen_state.kernel.outline, schedule_index)) + codegen_state.kernel.outline, outline_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_grid_size): diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index eb0157bf8..01a65b3cb 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -231,7 +231,7 @@ class ISPCASTBuilder(CFamilyASTBuilder): # {{{ top-level codegen def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): name = codegen_result.current_program(codegen_state).name from cgen import (FunctionDeclaration, Value) diff --git a/loopy/target/numba.py b/loopy/target/numba.py index 6946063ee..9ec194f32 100644 --- a/loopy/target/numba.py +++ b/loopy/target/numba.py @@ -49,7 +49,7 @@ class NumbaBaseASTBuilder(PythonASTBuilderBase): ]) def get_function_definition(self, codegen_state, codegen_result, - schedule_index, + outline_index, function_decl, function_body): assert function_decl is None diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index e55651c8a..f13f4dea9 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -393,9 +393,9 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): # {{{ top-level codegen def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): fdecl = super(OpenCLCASTBuilder, self).get_function_declaration( - codegen_state, codegen_result, schedule_index) + codegen_state, codegen_result, outline_index) from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) @@ -407,7 +407,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): from loopy.outline import get_insn_ids_for_block_at _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( - codegen_state.kernel.outline, schedule_index)) + codegen_state.kernel.outline, outline_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_sizes): diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 826ba2a8f..275b43e32 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -630,7 +630,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): # {{{ code generation guts def get_function_definition(self, codegen_state, codegen_result, - schedule_index, function_decl, function_body): + outline_index, function_decl, function_body): from loopy.kernel.data import TemporaryVariable args = ( ["_lpy_cl_kernels", "queue"] @@ -667,7 +667,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): ])) def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): # no such thing in Python return None diff --git a/loopy/target/python.py b/loopy/target/python.py index ce04986d3..64860c43c 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -202,11 +202,11 @@ class PythonASTBuilderBase(ASTBuilderBase): ]) def get_function_declaration(self, codegen_state, codegen_result, - schedule_index): + outline_index): return None def get_function_definition(self, codegen_state, codegen_result, - schedule_index, + outline_index, function_decl, function_body): assert function_decl is None @@ -217,7 +217,7 @@ class PythonASTBuilderBase(ASTBuilderBase): [idi.name for idi in codegen_state.implemented_data_info], function_body) - def get_temporary_decls(self, codegen_state, schedule_index): + def get_temporary_decls(self, codegen_state, outline_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper -- GitLab From bf38c660d669f9e05886c1517fddabbaa9888c67 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:10:32 -0600 Subject: [PATCH 25/56] renamed schedule_indices->outline_indices --- loopy/codegen/control.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 3d40b3379..27fa54243 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -282,7 +282,7 @@ def build_loop_nest(codegen_state, outline_index): sched_index_info_entries = [ OutlineIndexInfo( - schedule_indices=[i], + outline_indices=[i], admissible_cond_inames=( get_usable_inames_for_conditional(kernel, i)), required_predicates=get_required_predicates(kernel, i), @@ -298,10 +298,10 @@ def build_loop_nest(codegen_state, outline_index): sii.required_predicates, sii.used_inames_within), merge=lambda sii1, sii2: sii1.copy( - schedule_indices=( - sii1.schedule_indices + outline_indices=( + sii1.outline_indices + - sii2.schedule_indices))) + sii2.outline_indices))) # }}} @@ -456,7 +456,7 @@ def build_loop_nest(codegen_state, outline_index): # group only contains starting outline item def gen_code(inner_codegen_state): result = [] - for i in origin_si_entry.schedule_indices: + for i in origin_si_entry.outline_indices: inner = generate_code_for_sched_index( inner_codegen_state, i) -- GitLab From f8b2c3c1d117537436aed5dbeab2ff97f65c42ae Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:16:50 -0600 Subject: [PATCH 26/56] renamed KernelState.SCHEDULED->KernelState.OUTLINED --- doc/tutorial.rst | 4 ++-- loopy/auto_test.py | 2 +- loopy/codegen/__init__.py | 2 +- loopy/kernel/__init__.py | 10 +++++----- loopy/kernel/tools.py | 4 ++-- loopy/outline/__init__.py | 16 ++++++++-------- loopy/outline/device_mapping.py | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index fa917ac43..11e248dd3 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1213,7 +1213,7 @@ should call :func:`loopy.get_one_outlined_kernel`: --------------------------------------------------------------------------- ... --------------------------------------------------------------------------- - SCHEDULE: + OUTLINE: 0: CALL KERNEL rotate_v2(extra_args=[], extra_inames=[]) 1: tmp = arr[i_inner + i_outer*16] {id=maketmp} 2: RETURN FROM KERNEL rotate_v2 @@ -1251,7 +1251,7 @@ put those instructions into the outline. --------------------------------------------------------------------------- ... --------------------------------------------------------------------------- - SCHEDULE: + OUTLINE: 0: CALL KERNEL rotate_v2(extra_args=['tmp_save_slot'], extra_inames=[]) 1: tmp = arr[i_inner + i_outer*16] {id=maketmp} 2: tmp_save_slot[tmp_save_hw_dim_0_rotate_v2, tmp_save_hw_dim_1_rotate_v2] = tmp {id=tmp.save} diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 248cd5bb0..c6506051e 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -534,7 +534,7 @@ def auto_test_vs_ref( from loopy.target.pyopencl import PyOpenCLTarget if test_knl.state not in [ KernelState.PREPROCESSED, - KernelState.SCHEDULED]: + KernelState.OUTLINED]: if isinstance(test_knl.target, PyOpenCLTarget): test_knl = test_knl.copy(target=PyOpenCLTarget(ctx.devices[0])) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index f5cf1a1f9..54906857f 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -388,7 +388,7 @@ def generate_code_v2(kernel): from loopy.outline import get_one_outlined_kernel kernel = get_one_outlined_kernel(kernel) - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.OUTLINED: raise LoopyError("cannot generate code for a kernel that has not been " "scheduled") diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 5b0e8f4cc..6f75ec169 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -101,7 +101,7 @@ class _UniqueVarNameGenerator(UniqueNameGenerator): class KernelState: # noqa INITIAL = 0 PREPROCESSED = 1 - SCHEDULED = 2 + OUTLINED = 2 # {{{ kernel_state, KernelState compataibility @@ -128,8 +128,8 @@ class kernel_state(object): # noqa return KernelState.PREPROCESSED @_deperecated_kernel_state_class_method - def SCHEDULED(): # pylint:disable=no-method-argument - return KernelState.SCHEDULED + def OUTLINED(): # pylint:disable=no-method-argument + return KernelState.OUTLINED # }}} @@ -332,7 +332,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): if state not in [ KernelState.INITIAL, KernelState.PREPROCESSED, - KernelState.SCHEDULED, + KernelState.OUTLINED, ]: raise ValueError("invalid value for 'state'") @@ -1344,7 +1344,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): if "outline" in what and kernel.outline is not None: lines.extend(sep) if show_labels: - lines.append("SCHEDULE:") + lines.append("OUTLINE:") from loopy.outline import dump_schedule lines.append(dump_schedule(kernel, kernel.outline)) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 77fe38c0c..62b7b53c7 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1726,7 +1726,7 @@ def get_subkernels(kernel): See also :class:`loopy.outline.CallKernel`. """ from loopy.kernel import KernelState - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.OUTLINED: raise LoopyError("Kernel must be scheduled") from loopy.outline import CallKernel @@ -1743,7 +1743,7 @@ def get_subkernel_to_insn_id_map(kernel): kernel must be scheduled. """ from loopy.kernel import KernelState - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.OUTLINED: raise LoopyError("Kernel must be scheduled") from loopy.outline import ( diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index ea6e0834d..cc2548322 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -687,11 +687,11 @@ def generate_loop_outlines_internal( print("KERNEL:") print(kernel.stringify(with_dependencies=True)) print(75*"=") - print("CURRENT SCHEDULE:") + print("CURRENT OUTLINE:") print(dump_schedule(sched_state.kernel, sched_state.outline)) if sched_state.preoutline: print(75*"=") - print("PREOUTLINED ITEMS AWAITING SCHEDULING:") + print("PREOUTLINED ITEMS AWAITING OUTLINING:") print(dump_schedule(sched_state.kernel, sched_state.preoutline)) #print("boost allowed:", allow_boost) print(75*"=") @@ -701,7 +701,7 @@ def generate_loop_outlines_internal( print(75*"=") if debug.debug_length == len(debug.longest_rejected_schedule): - print("WHY IS THIS A DEAD-END SCHEDULE?") + print("WHY IS THIS A DEAD-END OUTLINE?") #if len(outline) == 2: #from pudb import set_trace; set_trace() @@ -1847,7 +1847,7 @@ def generate_loop_outlines(kernel, debug_args={}): def generate_loop_outlines_inner(kernel, debug_args={}): from loopy.kernel import KernelState - if kernel.state not in (KernelState.PREPROCESSED, KernelState.SCHEDULED): + if kernel.state not in (KernelState.PREPROCESSED, KernelState.OUTLINED): raise LoopyError("cannot outline a kernel that has not been " "preprocessed") @@ -1859,7 +1859,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): debug = OutlineDebugger(**debug_args) preoutline = kernel.outline if ( - kernel.state == KernelState.SCHEDULED) else () + kernel.state == KernelState.OUTLINED) else () preoutlined_inames = set( insn.iname @@ -1911,7 +1911,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): unoutlined_insn_ids=set(insn.id for insn in kernel.instructions), outlined_insn_ids=frozenset(), - within_subkernel=kernel.state != KernelState.SCHEDULED, + within_subkernel=kernel.state != KernelState.OUTLINED, may_schedule_global_barriers=True, preoutline=preoutline, @@ -1980,11 +1980,11 @@ def generate_loop_outlines_inner(kernel, debug_args={}): new_kernel = kernel.copy( outline=gen_sched, - state=KernelState.SCHEDULED) + state=KernelState.OUTLINED) from loopy.outline.device_mapping import \ map_schedule_onto_host_or_device - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.OUTLINED: # Device mapper only gets run once. new_kernel = map_schedule_onto_host_or_device(new_kernel) diff --git a/loopy/outline/device_mapping.py b/loopy/outline/device_mapping.py index 4eca2409f..19de32afa 100644 --- a/loopy/outline/device_mapping.py +++ b/loopy/outline/device_mapping.py @@ -31,7 +31,7 @@ from loopy.outline.tools import get_block_boundaries def map_schedule_onto_host_or_device(kernel): # FIXME: Should be idempotent. from loopy.kernel import KernelState - assert kernel.state == KernelState.SCHEDULED + assert kernel.state == KernelState.OUTLINED from functools import partial device_prog_name_gen = partial( -- GitLab From e75ae235be208b20f90efab997685ec6c4e38f68 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:18:36 -0600 Subject: [PATCH 27/56] pre-schedule->pre-outline --- loopy/check.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 8fbb6842a..35229a931 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -62,7 +62,7 @@ def check_identifiers_in_subst_rules(knl): # }}} -# {{{ sanity checks run pre-scheduling +# {{{ sanity checks run pre-outlining # FIXME: Replace with an enum. See # https://gitlab.tiker.net/inducer/loopy/issues/85 @@ -648,7 +648,7 @@ def check_variable_access_ordered(kernel): def pre_schedule_checks(kernel): try: - logger.debug("%s: pre-schedule check: start" % kernel.name) + logger.debug("%s: pre-outline check: start" % kernel.name) check_for_integer_subscript_indices(kernel) check_for_duplicate_insn_ids(kernel) @@ -665,12 +665,12 @@ def pre_schedule_checks(kernel): check_has_outlinable_iname_nesting(kernel) check_variable_access_ordered(kernel) - logger.debug("%s: pre-schedule check: done" % kernel.name) + logger.debug("%s: pre-outline check: done" % kernel.name) except KeyboardInterrupt: raise except Exception: print(75*"=") - print("failing kernel during pre-schedule check:") + print("failing kernel during pre-outline check:") print(75*"=") print(kernel) print(75*"=") @@ -930,7 +930,7 @@ def pre_codegen_checks(kernel): logger.debug("pre-codegen check %s: done" % kernel.name) except Exception: print(75*"=") - print("failing kernel during pre-schedule check:") + print("failing kernel during pre-outline check:") print(75*"=") print(kernel) print(75*"=") -- GitLab From f93a5eb67f4f01ceb2a88474c667f7664cfa0207 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:20:40 -0600 Subject: [PATCH 28/56] renamed pre_schedule_checks->pre_outline_checks --- loopy/check.py | 2 +- loopy/outline/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 35229a931..9c33f0aa3 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -646,7 +646,7 @@ def check_variable_access_ordered(kernel): # }}} -def pre_schedule_checks(kernel): +def pre_outline_checks(kernel): try: logger.debug("%s: pre-outline check: start" % kernel.name) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index cc2548322..737c7487e 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1851,8 +1851,8 @@ def generate_loop_outlines_inner(kernel, debug_args={}): raise LoopyError("cannot outline a kernel that has not been " "preprocessed") - from loopy.check import pre_schedule_checks - pre_schedule_checks(kernel) + from loopy.check import pre_outline_checks + pre_outline_checks(kernel) outline_count = 0 -- GitLab From 759efd2b908e56d23f0e3d700601648d2abbc0c1 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:22:33 -0600 Subject: [PATCH 29/56] add_extra_args_to_schedule->add_extra_args_to_outline --- loopy/outline/__init__.py | 4 ++-- loopy/outline/tools.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 737c7487e..5780305cd 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1988,8 +1988,8 @@ def generate_loop_outlines_inner(kernel, debug_args={}): # Device mapper only gets run once. new_kernel = map_schedule_onto_host_or_device(new_kernel) - from loopy.outline.tools import add_extra_args_to_schedule - new_kernel = add_extra_args_to_schedule(new_kernel) + from loopy.outline.tools import add_extra_args_to_outline + new_kernel = add_extra_args_to_outline(new_kernel) yield new_kernel debug.start() diff --git a/loopy/outline/tools.py b/loopy/outline/tools.py index ea757b837..67360780d 100644 --- a/loopy/outline/tools.py +++ b/loopy/outline/tools.py @@ -72,7 +72,7 @@ def temporaries_written_in_subkernel(kernel, subkernel): # {{{ add extra args to schedule -def add_extra_args_to_schedule(kernel): +def add_extra_args_to_outline(kernel): """ Fill the `extra_args` fields in all the :class:`loopy.outline.CallKernel` instructions in the outline with global temporaries. -- GitLab From 1ed9ea4b344f1ff326f66c1aca6677596fb9c8e6 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:36:59 -0600 Subject: [PATCH 30/56] rename schedule->outline in more comments and strings --- examples/python/global_barrier_removal.py | 2 +- loopy/check.py | 8 +-- loopy/codegen/__init__.py | 2 +- loopy/codegen/control.py | 2 +- loopy/kernel/tools.py | 14 ++--- loopy/outline/__init__.py | 64 +++++++++++------------ loopy/outline/tools.py | 2 +- loopy/preprocess.py | 2 +- loopy/target/c/__init__.py | 2 +- loopy/transform/buffer.py | 2 +- loopy/transform/iname.py | 10 ++-- loopy/transform/precompute.py | 2 +- test/test_loopy.py | 10 ++-- 13 files changed, 61 insertions(+), 61 deletions(-) diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index 8f31a59ee..dad6cb882 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -19,7 +19,7 @@ knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "c": np.float32, "out": np.float32, "n": np.int32}) -# schedule +# outline from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) diff --git a/loopy/check.py b/loopy/check.py index 9c33f0aa3..b941f7e70 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -677,7 +677,7 @@ def pre_outline_checks(kernel): raise -# {{{ post-schedule / pre-code-generation checks +# {{{ post-outline / pre-code-generation checks # {{{ check for unused hw axes @@ -759,7 +759,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): else: raise TypeError( - "schedule item not understood: %s" % type(sched_item).__name__) + "outline item not understood: %s" % type(sched_item).__name__) return past_end_i @@ -850,7 +850,7 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): # }}} -# {{{ check that all instructions are scheduled +# {{{ check that all instructions are outlined def check_that_all_insns_are_outlined(kernel): @@ -866,7 +866,7 @@ def check_that_all_insns_are_outlined(kernel): if outlined_insns < all_outlinable_insns: from loopy.diagnostic import UnoutlinedInstructionError raise UnoutlinedInstructionError( - "unscheduled instructions: '%s'" + "unoutlined instructions: '%s'" % ', '.join(all_outlinable_insns - outlined_insns)) # }}} diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 54906857f..a39793c11 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -390,7 +390,7 @@ def generate_code_v2(kernel): if kernel.state != KernelState.OUTLINED: raise LoopyError("cannot generate code for a kernel that has not been " - "scheduled") + "outlined") # {{{ cache retrieval diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 27fa54243..39904387a 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -250,7 +250,7 @@ def build_loop_nest(codegen_state, outline_index): if isinstance(sched_item, (EnterLoop, CallKernel)): _, i = gather_schedule_block(kernel.outline, i) assert i <= codegen_state.outline_index_end, \ - "schedule block extends beyond outline_index_end" + "outline block extends beyond outline_index_end" elif isinstance(sched_item, Barrier): i += 1 diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 62b7b53c7..741ffd9e2 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -470,7 +470,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): except RuntimeError as e: iname_cluster = False from warnings import warn - warn("error encountered during scheduling for dep graph -- " + warn("error encountered during outlining for dep graph -- " "cannot perform iname clustering: %s(%s)" % (type(e).__name__, e)) @@ -551,7 +551,7 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): elif isinstance(sched_item, (CallKernel, ReturnFromKernel, Barrier)): pass else: - raise LoopyError("schedule item not unterstood: %r" % sched_item) + raise LoopyError("outline item not unterstood: %r" % sched_item) return "digraph %s {\n%s\n}" % ( kernel.name, @@ -1721,13 +1721,13 @@ def find_most_recent_global_barrier(kernel, insn_id): @memoize_on_first_arg def get_subkernels(kernel): """Return a :class:`tuple` of the names of the subkernels in the kernel. The - kernel must be scheduled. + kernel must be outlined. See also :class:`loopy.outline.CallKernel`. """ from loopy.kernel import KernelState if kernel.state != KernelState.OUTLINED: - raise LoopyError("Kernel must be scheduled") + raise LoopyError("Kernel must be outlined") from loopy.outline import CallKernel @@ -1739,12 +1739,12 @@ def get_subkernels(kernel): @memoize_on_first_arg def get_subkernel_to_insn_id_map(kernel): """Return a :class:`dict` mapping subkernel names to a :class:`frozenset` - consisting of the instruction ids scheduled within the subkernel. The - kernel must be scheduled. + consisting of the instruction ids outlined within the subkernel. The + kernel must be outlined. """ from loopy.kernel import KernelState if kernel.state != KernelState.OUTLINED: - raise LoopyError("Kernel must be scheduled") + raise LoopyError("Kernel must be outlined") from loopy.outline import ( sched_item_to_insn_id, CallKernel, ReturnFromKernel) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 5780305cd..1a3ec95f5 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -250,7 +250,7 @@ def find_loop_nest_around_map(kernel): if kernel.iname_tags_of_type(outer_iname, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. - # But they're realized by the scheduler as a loop + # But they're realized by the outliner as a loop # at the innermost level, so we'll cut them some # slack here. continue @@ -271,7 +271,7 @@ def find_loop_nest_around_map(kernel): def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): """Returns a dictionary mapping inames to other instruction ids that need to - be scheduled before the iname should be eligible for scheduling. + be outlined before the iname should be eligible for outlining. """ result = {} @@ -515,7 +515,7 @@ class OutlineDebugger: (self.success_counter + self.dead_end_counter) % 50 == 0 and self.elapsed_time() > 10 ): - sys.stdout.write("\rscheduling... %d successes, " + sys.stdout.write("\routlining... %d successes, " "%d dead ends (longest %d)" % ( self.success_counter, self.dead_end_counter, @@ -535,7 +535,7 @@ class OutlineDebugger: def done_scheduling(self): if self.wrote_status: - sys.stdout.write("\rscheduler finished"+40*" "+"\n") + sys.stdout.write("\routliner finished"+40*" "+"\n") sys.stdout.flush() def elapsed_time(self): @@ -561,7 +561,7 @@ class OutlineDebugInput(Exception): # }}} -# {{{ scheduling algorithm +# {{{ outlining algorithm class OutlinerState(ImmutableRecord): """ @@ -582,9 +582,9 @@ class OutlinerState(ImmutableRecord): .. attribute:: parallel_inames *Note:* ``ilp`` and ``vec`` are not 'parallel' for the purposes of the - scheduler. See :attr:`ilp_inames`, :attr:`vec_inames`. + outliner. See :attr:`ilp_inames`, :attr:`vec_inames`. - .. rubric:: Time-varying scheduler state + .. rubric:: Time-varying outliner state .. attribute:: active_inames @@ -607,7 +607,7 @@ class OutlinerState(ImmutableRecord): .. attribute:: preoutline A sequence of outline items that must be inserted into the - outline, maintaining the same relative ordering. Newly scheduled + outline, maintaining the same relative ordering. Newly outlined items may interleave this sequence. .. attribute:: preoutlined_insn_ids @@ -620,11 +620,11 @@ class OutlinerState(ImmutableRecord): .. attribute:: may_schedule_global_barriers - Whether global barrier scheduling is allowed + Whether global barrier outlining is allowed .. attribute:: within_subkernel - Whether the scheduler is inside a subkernel + Whether the outliner is inside a subkernel .. attribute:: group_insn_counts @@ -634,7 +634,7 @@ class OutlinerState(ImmutableRecord): .. attribute:: active_group_counts A mapping from instruction group names to the number of instructions - in them that are left to schedule. If a group name occurs in this + in them that are left to outline. If a group name occurs in this mapping, that group is considered active. .. attribute:: uses_of_boostability @@ -746,7 +746,7 @@ def generate_loop_outlines_internal( # Barriers that do not have an originating instruction are handled here. # (These are automatically inserted by insert_barriers().) Barriers with # originating instructions are handled as part of normal instruction - # scheduling below. + # outlining below. if ( isinstance(next_preoutline_item, Barrier) and next_preoutline_item.originating_insn_id is None): @@ -761,7 +761,7 @@ def generate_loop_outlines_internal( # }}} - # {{{ see if any insns are ready to be scheduled now + # {{{ see if any insns are ready to be outlined now # Also take note of insns that have a chance of being outlinable inside # the current loop nest, in this set: @@ -797,7 +797,7 @@ def generate_loop_outlines_internal( if not is_ready: if debug_mode: - # These are not that interesting when understanding scheduler + # These are not that interesting when understanding outliner # failures. # print("instruction '%s' is missing insn depedencies '%s'" % ( @@ -829,7 +829,7 @@ def generate_loop_outlines_internal( print("instruction '%s' won't work under inames '%s'" % (format_insn(kernel, insn.id), ",".join(have-want))) - # {{{ check if scheduling this insn is compatible with preoutline + # {{{ check if outlining this insn is compatible with preoutline if insn_id in sched_state.preoutlined_insn_ids: if isinstance(next_preoutline_item, RunInstruction): @@ -848,7 +848,7 @@ def generate_loop_outlines_internal( # }}} - # {{{ check if scheduler state allows insn scheduling + # {{{ check if outliner state allows insn outlining from loopy.kernel.instruction import BarrierInstruction if isinstance(insn, BarrierInstruction) and \ @@ -947,7 +947,7 @@ def generate_loop_outlines_internal( ) # Don't be eager about entering/leaving loops--if progress has been - # made, revert to top of scheduler and see if more progress can be + # made, revert to top of outliner and see if more progress can be # made. for sub_sched in generate_loop_outlines_internal( new_sched_state, @@ -955,7 +955,7 @@ def generate_loop_outlines_internal( yield sub_sched if not sched_state.group_insn_counts: - # No groups: We won't need to backtrack on scheduling + # No groups: We won't need to backtrack on outlining # instructions. return @@ -980,7 +980,7 @@ def generate_loop_outlines_internal( can_leave = False elif last_entered_loop not in sched_state.breakable_inames: # If the iname is not breakable, then check that we've - # scheduled all the instructions that require it. + # outlined all the instructions that require it. for insn_id in sched_state.unoutlined_insn_ids: insn = kernel.id_to_insn[insn_id] @@ -1025,7 +1025,7 @@ def generate_loop_outlines_internal( if can_leave: can_leave = False - # We may only leave this loop if we've scheduled an instruction + # We may only leave this loop if we've outlined an instruction # since entering it. seen_an_insn = False @@ -1067,7 +1067,7 @@ def generate_loop_outlines_internal( # {{{ see if any loop can be entered now - # Find inames that are being referenced by as yet unscheduled instructions. + # Find inames that are being referenced by as yet unoutlined instructions. needed_inames = set() for insn_id in sched_state.unoutlined_insn_ids: needed_inames.update(kernel.insn_inames(insn_id)) @@ -1095,7 +1095,7 @@ def generate_loop_outlines_internal( for iname in needed_inames: - # {{{ check if scheduling this iname now is allowed/plausible + # {{{ check if outlining this iname now is allowed/plausible if ( iname in sched_state.preoutlined_inames @@ -1103,7 +1103,7 @@ def generate_loop_outlines_internal( isinstance(next_preoutline_item, EnterLoop) and next_preoutline_item.iname == iname)): if debug_mode: - print("scheduling %s prohibited by preoutline constraints" + print("outlining %s prohibited by preoutline constraints" % iname) continue @@ -1113,7 +1113,7 @@ def generate_loop_outlines_internal( not sched_state.loop_nest_around_map[iname] <= currently_accessible_inames): if debug_mode: - print("scheduling %s prohibited by loop nest-around map" % iname) + print("outlining %s prohibited by loop nest-around map" % iname) continue if ( @@ -1121,7 +1121,7 @@ def generate_loop_outlines_internal( <= sched_state.outlined_insn_ids): if debug_mode: print( - "scheduling {iname} prohibited by loop dependency map " + "outlining {iname} prohibited by loop dependency map " "(needs '{needed_insns})'" .format( iname=iname, @@ -1144,7 +1144,7 @@ def generate_loop_outlines_internal( <= currently_accessible_inames) # Check if any parameters are temporary variables, and if so, if their - # writes have already been scheduled. + # writes have already been outlined. data_dep_written = True for domain_par in ( @@ -1155,7 +1155,7 @@ def generate_loop_outlines_internal( if writer_insn not in sched_state.outlined_insn_ids: data_dep_written = False if debug_mode: - print("iname '%s' not scheduled because domain " + print("iname '%s' not outlined because domain " "parameter '%s' is not yet available" % (iname, domain_par)) break @@ -1686,7 +1686,7 @@ def insert_barriers( :arg level: the current level of loop nesting, 0 for outermost. """ - # {{{ insert barriers at outermost scheduling level + # {{{ insert barriers at outermost outlining level def insert_barriers_at_outer_level(outline, reverse=False): dep_tracker = DependencyTracker(kernel, var_kind=synchronization_kind, @@ -1826,7 +1826,7 @@ class MinRecursionLimitForOutlining(MinRecursionLimit): len(kernel.instructions) * 2 + len(kernel.all_inames()) * 4) -# {{{ main scheduling entrypoint +# {{{ main outlining entrypoint def generate_loop_outlines(kernel, debug_args={}): """ @@ -1917,7 +1917,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): preoutline=preoutline, insn_ids_to_try=None, - # ilp and vec are not parallel for the purposes of the scheduler + # ilp and vec are not parallel for the purposes of the outliner parallel_inames=parallel_inames - ilp_inames - vec_inames, group_insn_counts=group_insn_counts(kernel), @@ -1931,7 +1931,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): def print_longest_dead_end(): if debug.interactive: - print("Loo.py will now show you the scheduler state at the point") + print("Loo.py will now show you the outliner state at the point") print("where the longest (dead-end) outline was generated, in the") print("the hope that some of this makes sense and helps you find") print("the issue.") @@ -1999,7 +1999,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): except KeyboardInterrupt: print() print(75*"-") - print("Interrupted during scheduling") + print("Interrupted during outlining") print(75*"-") print_longest_dead_end() raise diff --git a/loopy/outline/tools.py b/loopy/outline/tools.py index 67360780d..e0e69a9b3 100644 --- a/loopy/outline/tools.py +++ b/loopy/outline/tools.py @@ -70,7 +70,7 @@ def temporaries_written_in_subkernel(kernel, subkernel): # }}} -# {{{ add extra args to schedule +# {{{ add extra args to outline def add_extra_args_to_outline(kernel): """ diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 23c4b7fbd..7e24685fe 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -122,7 +122,7 @@ def check_reduction_iname_uniqueness(kernel): if nonsimul_count and count > 1: raise LoopyError("iname '%s' used in more than one reduction. " "(%d of them, to be precise.) " - "Since this usage can easily cause loop scheduling " + "Since this usage can easily cause loop outlining " "problems, this is prohibited by default. " "Use loopy.make_reduction_inames_unique() to fix this. " "If you are sure that this is OK, write the reduction " diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 1a146e01c..81ca9688e 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -512,7 +512,7 @@ class CFamilyASTBuilder(ASTBuilderBase): from loopy.outline import CallKernel # We only need to write declarations for global variables with # the first device program. `is_first_dev_prog` determines - # whether this is the first device program in the schedule. + # whether this is the first device program in the outline. is_first_dev_prog = codegen_state.is_generating_device_code for i in range(outline_index): if isinstance(kernel.outline[i], CallKernel): diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index 7f4779cc7..b9fc1b3a8 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -328,7 +328,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, if domch.leaf_domain_index is not None: # If the sweep inames are at home in parent domains, then we'll add # fetches with loops over copies of these parent inames that will end - # up being scheduled *within* loops over these parents. + # up being outlined *within* loops over these parents. for iname in buffer_inames_set: if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 0ca73d6c0..afb7ea6c2 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -98,7 +98,7 @@ def prioritize_loops(kernel, loop_priority): Priority is only considered if loop nesting is ambiguous. prioritize_loops can be used multiple times. If you do so, each given - *loop_priority* specifies a scheduling constraint. The constraints from + *loop_priority* specifies a outlining constraint. The constraints from all calls to prioritize_loops together establish a partial order on the inames (see https://en.wikipedia.org/wiki/Partially_ordered_set). @@ -912,7 +912,7 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # }}} -# {{{ iname duplication for schedulability +# {{{ iname duplication for outlinability def _get_iname_duplication_options(insn_iname_sets, old_common_inames=frozenset([])): # Remove common inames of the current insn_iname_sets, as they are not relevant @@ -987,17 +987,17 @@ def _get_iname_duplication_options(insn_iname_sets, old_common_inames=frozenset( def get_iname_duplication_options(knl, use_boostable_into=False): - """List options for duplication of inames, if necessary for schedulability + """List options for duplication of inames, if necessary for outlinability :returns: a generator listing all options to duplicate inames, if duplication - of an iname is necessary to ensure the schedulability of the kernel. + of an iname is necessary to ensure the outlinability of the kernel. Duplication options are returned as tuples (iname, within) as understood by :func:`duplicate_inames`. There is no guarantee, that the transformed kernel will be outlinable, because multiple duplications of iname may be necessary. Some kernels require the duplication of inames in order to be outlinable, as the - forced iname dependencies define an over-determined problem to the scheduler. + forced iname dependencies define an over-determined problem to the outliner. Consider the following minimal example: knl = lp.make_kernel(["{[i,j]:0<=i,j Date: Mon, 24 Feb 2020 07:39:35 -0600 Subject: [PATCH 31/56] gather_schedule_block->gather_outline_block --- loopy/check.py | 4 ++-- loopy/codegen/control.py | 8 ++++---- loopy/outline/__init__.py | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index b941f7e70..cba8e87e4 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -684,7 +684,7 @@ def pre_outline_checks(kernel): def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): from loopy.outline import (CallKernel, RunInstruction, Barrier, EnterLoop, LeaveLoop, ReturnFromKernel, - get_insn_ids_for_block_at, gather_schedule_block) + get_insn_ids_for_block_at, gather_outline_block) if sched_index is None: group_axes = set() @@ -694,7 +694,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): loop_end_i = past_end_i = len(kernel.outline) else: assert isinstance(kernel.outline[sched_index], CallKernel) - _, past_end_i = gather_schedule_block(kernel.outline, sched_index) + _, past_end_i = gather_outline_block(kernel.outline, sched_index) group_size, local_size = kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(kernel.outline, sched_index)) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 39904387a..237a88fdb 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -28,7 +28,7 @@ from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.outline import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, - gather_schedule_block, generate_sub_sched_items) + gather_outline_block, generate_sub_sched_items) from loopy.diagnostic import LoopyError @@ -72,8 +72,8 @@ def generate_code_for_sched_index(codegen_state, sched_index): if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code - from loopy.outline import (gather_schedule_block, get_insn_ids_for_block_at) - _, past_end_i = gather_schedule_block(kernel.outline, sched_index) + from loopy.outline import (gather_outline_block, get_insn_ids_for_block_at) + _, past_end_i = gather_outline_block(kernel.outline, sched_index) assert past_end_i <= codegen_state.outline_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) @@ -248,7 +248,7 @@ def build_loop_nest(codegen_state, outline_index): my_sched_indices.append(i) if isinstance(sched_item, (EnterLoop, CallKernel)): - _, i = gather_schedule_block(kernel.outline, i) + _, i = gather_outline_block(kernel.outline, i) assert i <= codegen_state.outline_index_end, \ "outline block extends beyond outline_index_end" diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 1a3ec95f5..b7a87dad1 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -105,7 +105,7 @@ class Barrier(OutlineItem): # {{{ outline utilities -def gather_schedule_block(outline, start_idx): +def gather_outline_block(outline, start_idx): assert isinstance(outline[start_idx], BeginBlockItem) level = 0 @@ -174,7 +174,7 @@ def has_barrier_within(kernel, sched_index): sched_item = kernel.outline[sched_index] if isinstance(sched_item, BeginBlockItem): - loop_contents, _ = gather_schedule_block( + loop_contents, _ = gather_outline_block( kernel.outline, sched_index) from pytools import any return any(isinstance(subsched_item, Barrier) @@ -189,7 +189,7 @@ def find_used_inames_within(kernel, sched_index): sched_item = kernel.outline[sched_index] if isinstance(sched_item, BeginBlockItem): - loop_contents, _ = gather_schedule_block( + loop_contents, _ = gather_outline_block( kernel.outline, sched_index) run_insns = [subsched_item for subsched_item in loop_contents @@ -1707,7 +1707,7 @@ def insert_barriers( sched_item = outline[i] if isinstance(sched_item, EnterLoop): - subloop, new_i = gather_schedule_block(outline, i) + subloop, new_i = gather_outline_block(outline, i) loop_head = ( insn_ids_reachable_from_start_without_intervening_barrier( @@ -1789,7 +1789,7 @@ def insert_barriers( sched_item = outline[i] if isinstance(sched_item, EnterLoop): - subloop, new_i = gather_schedule_block(outline, i) + subloop, new_i = gather_outline_block(outline, i) new_subloop = insert_barriers( kernel, subloop[1:-1], synchronization_kind, verify_only, level + 1) -- GitLab From 23c976679a5d60a907fbb91c199f06d81d4d142a Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:41:16 -0600 Subject: [PATCH 32/56] renamed done_scheduling->done_outlining --- loopy/outline/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index b7a87dad1..862c55e3b 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -533,7 +533,7 @@ class OutlineDebugger: self.dead_end_counter += 1 self.update() - def done_scheduling(self): + def done_outlining(self): if self.wrote_status: sys.stdout.write("\routliner finished"+40*" "+"\n") sys.stdout.flush() @@ -2004,7 +2004,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): print_longest_dead_end() raise - debug.done_scheduling() + debug.done_outlining() if not outline_count: print(75*"-") print("ERROR: Sorry--loo.py did not find an outline for your kernel.") -- GitLab From f751ae4292ace8af96b0d9c567a37c23ef75655b Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:43:05 -0600 Subject: [PATCH 33/56] renamed dump_schedule->dump_outline --- loopy/kernel/__init__.py | 4 ++-- loopy/outline/__init__.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 6f75ec169..f85f42f49 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1345,8 +1345,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): lines.extend(sep) if show_labels: lines.append("OUTLINE:") - from loopy.outline import dump_schedule - lines.append(dump_schedule(kernel, kernel.outline)) + from loopy.outline import dump_outline + lines.append(dump_outline(kernel, kernel.outline)) lines.extend(sep) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 862c55e3b..77774ca84 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -456,7 +456,7 @@ def format_insn(kernel, insn_id): Fore.CYAN, str(insn), Style.RESET_ALL) -def dump_schedule(kernel, outline): +def dump_outline(kernel, outline): lines = [] indent = "" @@ -688,11 +688,11 @@ def generate_loop_outlines_internal( print(kernel.stringify(with_dependencies=True)) print(75*"=") print("CURRENT OUTLINE:") - print(dump_schedule(sched_state.kernel, sched_state.outline)) + print(dump_outline(sched_state.kernel, sched_state.outline)) if sched_state.preoutline: print(75*"=") print("PREOUTLINED ITEMS AWAITING OUTLINING:") - print(dump_schedule(sched_state.kernel, sched_state.preoutline)) + print(dump_outline(sched_state.kernel, sched_state.preoutline)) #print("boost allowed:", allow_boost) print(75*"=") print("LOOP NEST MAP (inner: outer):") -- GitLab From eb935a51bae892d25f59b93bec4f7eb0d89f9502 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:45:05 -0600 Subject: [PATCH 34/56] renamed map_schedule_onto_host_or_device->map_outline_onto_host_or_device --- loopy/outline/__init__.py | 4 ++-- loopy/outline/device_mapping.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 77774ca84..24a627143 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1983,10 +1983,10 @@ def generate_loop_outlines_inner(kernel, debug_args={}): state=KernelState.OUTLINED) from loopy.outline.device_mapping import \ - map_schedule_onto_host_or_device + map_outline_onto_host_or_device if kernel.state != KernelState.OUTLINED: # Device mapper only gets run once. - new_kernel = map_schedule_onto_host_or_device(new_kernel) + new_kernel = map_outline_onto_host_or_device(new_kernel) from loopy.outline.tools import add_extra_args_to_outline new_kernel = add_extra_args_to_outline(new_kernel) diff --git a/loopy/outline/device_mapping.py b/loopy/outline/device_mapping.py index 19de32afa..cd8288f5e 100644 --- a/loopy/outline/device_mapping.py +++ b/loopy/outline/device_mapping.py @@ -28,7 +28,7 @@ from loopy.outline import (Barrier, CallKernel, EnterLoop, LeaveLoop, from loopy.outline.tools import get_block_boundaries -def map_schedule_onto_host_or_device(kernel): +def map_outline_onto_host_or_device(kernel): # FIXME: Should be idempotent. from loopy.kernel import KernelState assert kernel.state == KernelState.OUTLINED @@ -49,13 +49,13 @@ def map_schedule_onto_host_or_device(kernel): [ReturnFromKernel(kernel_name=kernel.name)]) kernel = kernel.copy(outline=new_outline) else: - kernel = map_schedule_onto_host_or_device_impl( + kernel = map_outline_onto_host_or_device_impl( kernel, device_prog_name_gen) return kernel -def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): +def map_outline_onto_host_or_device_impl(kernel, device_prog_name_gen): outline = kernel.outline loop_bounds = get_block_boundaries(outline) -- GitLab From d2e37c776d67f2ac94f657ffff4861538feb73d2 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:46:55 -0600 Subject: [PATCH 35/56] renamed get_typed_and_scheduled_kernel->get_typed_and_outlined_kernel --- loopy/target/c/c_execution.py | 2 +- loopy/target/execution.py | 8 ++++---- loopy/target/pyopencl_execution.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 698507978..91182b55d 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -396,7 +396,7 @@ class CKernelExecutor(KernelExecutorBase): @memoize_method def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): - kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) + kernel = self.get_typed_and_outlined_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(kernel) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index b540f4f58..1ada5b212 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -729,7 +729,7 @@ class KernelExecutorBase(object): arg.dtype is None for arg in kernel.args) - def get_typed_and_scheduled_kernel_uncached(self, arg_to_dtype_set): + def get_typed_and_outlined_kernel_uncached(self, arg_to_dtype_set): from loopy.kernel.tools import add_dtypes kernel = self.kernel @@ -763,7 +763,7 @@ class KernelExecutorBase(object): return kernel - def get_typed_and_scheduled_kernel(self, arg_to_dtype_set): + def get_typed_and_outlined_kernel(self, arg_to_dtype_set): from loopy import CACHING_ENABLED from loopy.preprocess import prepare_for_caching @@ -780,7 +780,7 @@ class KernelExecutorBase(object): logger.debug("%s: typed-and-scheduled cache miss" % self.kernel.name) - kernel = self.get_typed_and_scheduled_kernel_uncached(arg_to_dtype_set) + kernel = self.get_typed_and_outlined_kernel_uncached(arg_to_dtype_set) if CACHING_ENABLED: typed_and_scheduled_cache.store_if_not_present(cache_key, kernel) @@ -831,7 +831,7 @@ class KernelExecutorBase(object): arg_to_dtype = frozenset( (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype)) - kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) + kernel = self.get_typed_and_outlined_kernel(arg_to_dtype) from loopy.codegen import generate_code_v2 code = generate_code_v2(kernel) diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 05fdd21f0..39c8fc734 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -276,7 +276,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): @memoize_method def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): - kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) + kernel = self.get_typed_and_outlined_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 from loopy.target.execution import get_highlighted_code -- GitLab From 376f29f32c60290067a84a99b607c53fe001dd67 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:49:39 -0600 Subject: [PATCH 36/56] changed loop-scheduled->loop-outlined in docs --- loopy/target/c/c_execution.py | 2 +- loopy/target/pyopencl_execution.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 91182b55d..050b30451 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -383,7 +383,7 @@ class CKernelExecutor(KernelExecutorBase): """ :arg kernel: may be a loopy.LoopKernel, a generator returning kernels (a warning will be issued if more than one is returned). If the - kernel has not yet been loop-scheduled, that is done, too, with no + kernel has not yet been loop-outlined, that is done, too, with no specific arguments. """ diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 39c8fc734..b4a887a69 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -257,7 +257,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): :arg context: a :class:`pyopencl.Context` :arg kernel: may be a loopy.LoopKernel, a generator returning kernels (a warning will be issued if more than one is returned). If the - kernel has not yet been loop-scheduled, that is done, too, with no + kernel has not yet been loop-outlined, that is done, too, with no specific arguments. """ -- GitLab From e1a90b46ca04e0328ece852ca694c578fdabe8e2 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:52:11 -0600 Subject: [PATCH 37/56] changed schedule->outline in docs --- doc/misc.rst | 6 +++--- doc/ref_kernel.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/misc.rst b/doc/misc.rst index 62e5a1fa2..d6347ae0d 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -401,7 +401,7 @@ potentially valuable guarantee to keep existing code working unchanged for a while. Instead, it might be wiser to just grab the version of the language current at the time of writing the code. -Uh-oh. I got a scheduling error. Any hints? +Uh-oh. I got an outlining error. Any hints? ------------------------------------------- * Make sure that dependencies between instructions are as @@ -412,7 +412,7 @@ Uh-oh. I got a scheduling error. Any hints? There's a heuristic that tries to help find dependencies. If there's only a single write to a variable, then it adds dependencies from all readers to the writer. In your case, that's actually counterproductive, - because it creates a circular dependency, hence the scheduling issue. + because it creates a circular dependency, hence the outlining issue. So you'll have to turn that off, like so:: knl = lp.make_kernel( @@ -435,7 +435,7 @@ Uh-oh. I got a scheduling error. Any hints? * Make sure that your loops are correctly nested. - The scheduler will try to be as helpful as it can in telling + The outliner will try to be as helpful as it can in telling you where it got stuck. Citing Loopy diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 7662bf440..d2f4dac4d 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -297,7 +297,7 @@ These are usually key-value pairs. The following attributes are recognized: accepts an optional `@scope` suffix. * ``priority=integer`` sets the instructions priority to the value - ``integer``. Instructions with higher priority will be scheduled sooner, + ``integer``. Instructions with higher priority will be outlined sooner, if possible. Note that the outliner may still outline a lower-priority instruction ahead of a higher-priority one if loop orders or dependencies require it. -- GitLab From 6449440a77918e6c6a0a046ab1fd88bc457d097b Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:56:24 -0600 Subject: [PATCH 38/56] changed schedule->outline in more strings/comments --- loopy/auto_test.py | 2 +- loopy/kernel/__init__.py | 6 +++--- loopy/kernel/instruction.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index c6506051e..111dac092 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -384,7 +384,7 @@ def auto_test_vs_ref( max_test_kernel_count=1, quiet=False, blacklist_ref_vendors=[]): """Compare results of `ref_knl` to the kernels generated by - scheduling *test_knl*. + outlining *test_knl*. :arg check_result: a callable with :class:`numpy.ndarray` arguments *(result, reference_result)* returning a a tuple (class:`bool`, diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index f85f42f49..be778e06b 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -199,9 +199,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: loop_priority A frozenset of priority constraints to the kernel. Each such constraint - is a tuple of inames. Inames occuring in such a tuple will be scheduled - earlier than any iname following in the tuple. This applies only to inames - with non-parallel implementation tags. + is a tuple of inames. Inames occuring in such a tuple will be added to + outline earlier than any iname following in the tuple. This applies only + to inames with non-parallel implementation tags. .. attribute:: silenced_warnings diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index fff9896d6..e81c6889e 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -81,7 +81,7 @@ class InstructionBase(ImmutableRecord): A :class:`frozenset` of strings indicating which instruction groups (see :class:`InstructionBase.groups`) may not be active when this - instruction is scheduled. + instruction is outlined. .. attribute:: priority -- GitLab From 202074ea277a603ca1f195a6bb72357cf1a22a1c Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 07:57:38 -0600 Subject: [PATCH 39/56] renamed schedule_state->outline_state --- loopy/target/pyopencl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 275b43e32..6b69e3f13 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -671,7 +671,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): # no such thing in Python return None - def get_temporary_decls(self, codegen_state, schedule_state): + def get_temporary_decls(self, codegen_state, outline_state): from genpy import Assign, Comment, Line def alloc_nbytes(tv): -- GitLab From 54ba1a4a67e23a432ecdc0ed75f6e000cd8a3658 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:01:08 -0600 Subject: [PATCH 40/56] renamed outline_cache->schedule_cache --- loopy/outline/__init__.py | 8 ++++---- loopy/target/execution.py | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 24a627143..bb9976246 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -2017,8 +2017,8 @@ def generate_loop_outlines_inner(kernel, debug_args={}): # }}} -schedule_cache = WriteOncePersistentDict( - "loopy-schedule-cache-v4-"+DATA_MODEL_VERSION, +outline_cache = WriteOncePersistentDict( + "loopy-outline-cache-v4-"+DATA_MODEL_VERSION, key_builder=LoopyKeyBuilder()) @@ -2052,7 +2052,7 @@ def get_one_outlined_kernel(kernel): if CACHING_ENABLED: try: - result = schedule_cache[sched_cache_key] + result = outline_cache[sched_cache_key] logger.debug("%s: outline cache hit" % kernel.name) from_cache = True @@ -2065,7 +2065,7 @@ def get_one_outlined_kernel(kernel): result = _get_one_outlined_kernel_inner(kernel) if CACHING_ENABLED and not from_cache: - schedule_cache.store_if_not_present(sched_cache_key, result) + outline_cache.store_if_not_present(sched_cache_key, result) return result diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 1ada5b212..4444af374 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -693,8 +693,8 @@ class _Kernels(object): pass -typed_and_scheduled_cache = WriteOncePersistentDict( - "loopy-typed-and-scheduled-cache-v1-"+DATA_MODEL_VERSION, +typed_and_outlined_cache = WriteOncePersistentDict( + "loopy-typed-and-outlined-cache-v1-"+DATA_MODEL_VERSION, key_builder=LoopyKeyBuilder()) @@ -774,16 +774,16 @@ class KernelExecutorBase(object): if CACHING_ENABLED: try: - return typed_and_scheduled_cache[cache_key] + return typed_and_outlined_cache[cache_key] except KeyError: pass - logger.debug("%s: typed-and-scheduled cache miss" % self.kernel.name) + logger.debug("%s: typed-and-outlined cache miss" % self.kernel.name) kernel = self.get_typed_and_outlined_kernel_uncached(arg_to_dtype_set) if CACHING_ENABLED: - typed_and_scheduled_cache.store_if_not_present(cache_key, kernel) + typed_and_outlined_cache.store_if_not_present(cache_key, kernel) return kernel -- GitLab From 077ba3796cf6920db1b6fae609b0360dc3254e5f Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:10:05 -0600 Subject: [PATCH 41/56] rename longest_rejected_schedule->longest_rejected_outline --- loopy/outline/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index bb9976246..fdae8bb0e 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -498,7 +498,7 @@ def dump_outline(kernel, outline): class OutlineDebugger: def __init__(self, debug_length=None, interactive=True): - self.longest_rejected_schedule = [] + self.longest_rejected_outline = [] self.success_counter = 0 self.dead_end_counter = 0 self.debug_length = debug_length @@ -519,7 +519,7 @@ class OutlineDebugger: "%d dead ends (longest %d)" % ( self.success_counter, self.dead_end_counter, - len(self.longest_rejected_schedule))) + len(self.longest_rejected_outline))) sys.stdout.flush() self.wrote_status = 2 @@ -528,8 +528,8 @@ class OutlineDebugger: self.update() def log_dead_end(self, outline): - if len(outline) > len(self.longest_rejected_schedule): - self.longest_rejected_schedule = outline + if len(outline) > len(self.longest_rejected_outline): + self.longest_rejected_outline = outline self.dead_end_counter += 1 self.update() @@ -700,7 +700,7 @@ def generate_loop_outlines_internal( print("%s : %s" % (iname, ", ".join(val))) print(75*"=") - if debug.debug_length == len(debug.longest_rejected_schedule): + if debug.debug_length == len(debug.longest_rejected_outline): print("WHY IS THIS A DEAD-END OUTLINE?") #if len(outline) == 2: @@ -1944,7 +1944,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): print() print() - debug.debug_length = len(debug.longest_rejected_schedule) + debug.debug_length = len(debug.longest_rejected_outline) while True: try: for _ in generate_loop_outlines_internal( -- GitLab From 2d1876605f266b4920445776fba84df9e3070c1b Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:11:09 -0600 Subject: [PATCH 42/56] rename may_schedule_global_barriers->may_outline_global_barriers --- loopy/outline/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index fdae8bb0e..b140c0e23 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -618,7 +618,7 @@ class OutlinerState(ImmutableRecord): A :class:`frozenset` of any iname that started preoutlined - .. attribute:: may_schedule_global_barriers + .. attribute:: may_outline_global_barriers Whether global barrier outlining is allowed @@ -718,7 +718,7 @@ def generate_loop_outlines_internal( next_preoutline_item,), preoutline=sched_state.preoutline[1:], within_subkernel=True, - may_schedule_global_barriers=False, + may_outline_global_barriers=False, enclosing_subkernel_inames=sched_state.active_inames), allow_boost=rec_allow_boost, debug=debug): @@ -734,7 +734,7 @@ def generate_loop_outlines_internal( next_preoutline_item,), preoutline=sched_state.preoutline[1:], within_subkernel=False, - may_schedule_global_barriers=True), + may_outline_global_barriers=True), allow_boost=rec_allow_boost, debug=debug): yield result @@ -853,7 +853,7 @@ def generate_loop_outlines_internal( from loopy.kernel.instruction import BarrierInstruction if isinstance(insn, BarrierInstruction) and \ insn.synchronization_kind == "global": - if not sched_state.may_schedule_global_barriers: + if not sched_state.may_outline_global_barriers: if debug_mode: print("can't outline '%s' because global barriers are " "not currently allowed" % format_insn(kernel, insn.id)) @@ -1912,7 +1912,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): unoutlined_insn_ids=set(insn.id for insn in kernel.instructions), outlined_insn_ids=frozenset(), within_subkernel=kernel.state != KernelState.OUTLINED, - may_schedule_global_barriers=True, + may_outline_global_barriers=True, preoutline=preoutline, insn_ids_to_try=None, -- GitLab From a361fea7d7b0c881d54a03c87db5d155fb0c31b9 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:13:07 -0600 Subject: [PATCH 43/56] rename schedule_gen_kwargs->outline_gen_kwargs --- loopy/outline/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index b140c0e23..818ae060d 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1925,9 +1925,9 @@ def generate_loop_outlines_inner(kernel, debug_args={}): uses_of_boostability=[]) - schedule_gen_kwargs = {} + outline_gen_kwargs = {} if kernel.options.ignore_boostable_into: - schedule_gen_kwargs["allow_boost"] = None + outline_gen_kwargs["allow_boost"] = None def print_longest_dead_end(): if debug.interactive: @@ -1948,7 +1948,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): while True: try: for _ in generate_loop_outlines_internal( - sched_state, debug=debug, **schedule_gen_kwargs): + sched_state, debug=debug, **outline_gen_kwargs): pass except OutlineDebugInput as e: @@ -1959,7 +1959,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): try: for gen_sched in generate_loop_outlines_internal( - sched_state, debug=debug, **schedule_gen_kwargs): + sched_state, debug=debug, **outline_gen_kwargs): debug.stop() gen_sched = convert_barrier_instructions_to_barriers( -- GitLab From c2b1806ebf61d529075aa4e3a264865988df34eb Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:14:20 -0600 Subject: [PATCH 44/56] rename found_viable_schedule->found_viable_outline --- loopy/outline/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 818ae060d..759bbe84b 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1249,7 +1249,7 @@ def generate_loop_outlines_internal( print("useful inames: %s" % ",".join(useful_loops_set)) else: for tier in priority_tiers: - found_viable_schedule = False + found_viable_outline = False for iname in sorted(tier, key=lambda iname: ( @@ -1276,10 +1276,10 @@ def generate_loop_outlines_internal( ), allow_boost=rec_allow_boost, debug=debug): - found_viable_schedule = True + found_viable_outline = True yield sub_sched - if found_viable_schedule: + if found_viable_outline: return # }}} -- GitLab From 4f2b379282c53b121fe9e98c101a43a11f167761 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:42:51 -0600 Subject: [PATCH 45/56] renamed sched_item->outline_item --- loopy/check.py | 18 ++-- loopy/codegen/bounds.py | 8 +- loopy/codegen/control.py | 66 ++++++------- loopy/kernel/tools.py | 34 +++---- loopy/outline/__init__.py | 170 ++++++++++++++++---------------- loopy/outline/device_mapping.py | 34 +++---- loopy/outline/tools.py | 20 ++-- loopy/statistics.py | 22 ++--- loopy/transform/save.py | 54 +++++----- test/test_loopy.py | 18 ++-- 10 files changed, 222 insertions(+), 222 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index cba8e87e4..e81fbbfed 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -711,12 +711,12 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): GroupIndexTag) while i < loop_end_i: - sched_item = kernel.outline[i] - if isinstance(sched_item, CallKernel): + outline_item = kernel.outline[i] + if isinstance(outline_item, CallKernel): i = _check_for_unused_hw_axes_in_kernel_chunk(kernel, i) - elif isinstance(sched_item, RunInstruction): - insn = kernel.id_to_insn[sched_item.insn_id] + elif isinstance(outline_item, RunInstruction): + insn = kernel.id_to_insn[outline_item.insn_id] i += 1 if insn.boostable: @@ -753,13 +753,13 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): ",".join(str(i) for i in local_axes), ",".join(str(i) for i in local_axes_used))) - elif isinstance(sched_item, (Barrier, EnterLoop, LeaveLoop)): + elif isinstance(outline_item, (Barrier, EnterLoop, LeaveLoop)): i += 1 continue else: raise TypeError( - "outline item not understood: %s" % type(sched_item).__name__) + "outline item not understood: %s" % type(outline_item).__name__) return past_end_i @@ -855,11 +855,11 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): def check_that_all_insns_are_outlined(kernel): all_outlinable_insns = set(insn.id for insn in kernel.instructions) - from loopy.outline import sched_item_to_insn_id + from loopy.outline import outline_item_to_insn_id outlined_insns = set( insn_id - for sched_item in kernel.outline - for insn_id in sched_item_to_insn_id(sched_item)) + for outline_item in kernel.outline + for insn_id in outline_item_to_insn_id(outline_item)) assert outlined_insns <= all_outlinable_insns diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index d364948cd..c1d05a2ea 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -68,13 +68,13 @@ def get_usable_inames_for_conditional(kernel, sched_index): # Find our containing subkernel. Grab inames for all insns from there. within_subkernel = False - for sched_item_index, sched_item in enumerate( + for outline_item_index, outline_item in enumerate( kernel.outline[:sched_index]): from loopy.outline import CallKernel, ReturnFromKernel - if isinstance(sched_item, CallKernel): + if isinstance(outline_item, CallKernel): within_subkernel = True - subkernel_index = sched_item_index - elif isinstance(sched_item, ReturnFromKernel): + subkernel_index = outline_item_index + elif isinstance(outline_item, ReturnFromKernel): within_subkernel = False if not within_subkernel: diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 237a88fdb..18704da96 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -28,7 +28,7 @@ from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.outline import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, - gather_outline_block, generate_sub_sched_items) + gather_outline_block, generate_sub_outline_items) from loopy.diagnostic import LoopyError @@ -36,16 +36,16 @@ def synthesize_idis_for_extra_args(kernel, outline_index): """ :returns: A list of :class:`loopy.codegen.ImplementedDataInfo` """ - sched_item = kernel.outline[outline_index] + outline_item = kernel.outline[outline_index] from loopy.codegen import ImplementedDataInfo from loopy.kernel.data import InameArg, AddressSpace - assert isinstance(sched_item, CallKernel) + assert isinstance(outline_item, CallKernel) idis = [] - for arg in sched_item.extra_args: + for arg in outline_item.extra_args: temporary = kernel.temporary_variables[arg] assert temporary.address_space == AddressSpace.GLOBAL idis.extend( @@ -53,7 +53,7 @@ def synthesize_idis_for_extra_args(kernel, outline_index): kernel.target, index_dtype=kernel.index_dtype)) - for iname in sched_item.extra_inames: + for iname in outline_item.extra_inames: idis.append( ImplementedDataInfo( target=kernel.target, @@ -67,9 +67,9 @@ def synthesize_idis_for_extra_args(kernel, outline_index): def generate_code_for_sched_index(codegen_state, sched_index): kernel = codegen_state.kernel - sched_item = kernel.outline[sched_index] + outline_item = kernel.outline[sched_index] - if isinstance(sched_item, CallKernel): + if isinstance(outline_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.outline import (gather_outline_block, get_insn_ids_for_block_at) @@ -80,7 +80,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): new_codegen_state = codegen_state.copy( is_generating_device_code=True, - gen_program_name=sched_item.kernel_name, + gen_program_name=outline_item.kernel_name, outline_index_end=past_end_i-1, implemented_data_info=(codegen_state.implemented_data_info + extra_args)) @@ -97,13 +97,13 @@ def generate_code_for_sched_index(codegen_state, sched_index): codegen_state.ast_builder.get_kernel_call( codegen_state, - sched_item.kernel_name, + outline_item.kernel_name, glob_grid, loc_grid, extra_args), ]) - elif isinstance(sched_item, EnterLoop): - tags = kernel.iname_tags(sched_item.iname) + elif isinstance(outline_item, EnterLoop): + tags = kernel.iname_tags(outline_item.iname) tags = tuple(tag for tag in tags if tag) from loopy.codegen.loop import ( @@ -124,30 +124,30 @@ def generate_code_for_sched_index(codegen_state, sched_index): else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" - % (sched_item.iname, ", ".join(str(tag) for tag in tags))) + % (outline_item.iname, ", ".join(str(tag) for tag in tags))) return func(codegen_state, sched_index) - elif isinstance(sched_item, Barrier): + elif isinstance(outline_item, Barrier): # {{{ emit barrier code from loopy.codegen.result import CodeGenerationResult if codegen_state.is_generating_device_code: barrier_ast = codegen_state.ast_builder.emit_barrier( - sched_item.synchronization_kind, sched_item.mem_kind, - sched_item.comment) - if sched_item.originating_insn_id: + outline_item.synchronization_kind, outline_item.mem_kind, + outline_item.comment) + if outline_item.originating_insn_id: return CodeGenerationResult.new( codegen_state, - sched_item.originating_insn_id, + outline_item.originating_insn_id, barrier_ast, codegen_state.implemented_domain) else: return barrier_ast else: # host code - if sched_item.synchronization_kind in ["global", "local"]: + if outline_item.synchronization_kind in ["global", "local"]: # host code is assumed globally and locally synchronous return CodeGenerationResult( host_program=None, @@ -158,12 +158,12 @@ def generate_code_for_sched_index(codegen_state, sched_index): else: raise LoopyError("do not know how to emit code for barrier " "synchronization kind '%s'" "in host code" - % sched_item.synchronization_kind) + % outline_item.synchronization_kind) # }}} - elif isinstance(sched_item, RunInstruction): - insn = kernel.id_to_insn[sched_item.insn_id] + elif isinstance(outline_item, RunInstruction): + insn = kernel.id_to_insn[outline_item.insn_id] from loopy.codegen.instruction import generate_instruction_code return codegen_state.try_vectorized( @@ -172,20 +172,20 @@ def generate_code_for_sched_index(codegen_state, sched_index): else: raise RuntimeError("unexpected outline item type: %s" - % type(sched_item)) + % type(outline_item)) def get_required_predicates(kernel, sched_index): result = None - for _, sched_item in generate_sub_sched_items( + for _, outline_item in generate_sub_outline_items( kernel.outline, sched_index): - if isinstance(sched_item, Barrier): + if isinstance(outline_item, Barrier): my_preds = frozenset() - elif isinstance(sched_item, RunInstruction): - my_preds = kernel.id_to_insn[sched_item.insn_id].predicates + elif isinstance(outline_item, RunInstruction): + my_preds = kernel.id_to_insn[outline_item.insn_id].predicates else: raise RuntimeError("unexpected outline item type: %s" - % type(sched_item)) + % type(outline_item)) if result is None: result = my_preds @@ -240,26 +240,26 @@ def build_loop_nest(codegen_state, outline_index): i = outline_index while i < codegen_state.outline_index_end: - sched_item = kernel.outline[i] + outline_item = kernel.outline[i] - if isinstance(sched_item, LeaveLoop): + if isinstance(outline_item, LeaveLoop): break my_sched_indices.append(i) - if isinstance(sched_item, (EnterLoop, CallKernel)): + if isinstance(outline_item, (EnterLoop, CallKernel)): _, i = gather_outline_block(kernel.outline, i) assert i <= codegen_state.outline_index_end, \ "outline block extends beyond outline_index_end" - elif isinstance(sched_item, Barrier): + elif isinstance(outline_item, Barrier): i += 1 - elif isinstance(sched_item, RunInstruction): + elif isinstance(outline_item, RunInstruction): i += 1 else: raise RuntimeError("unexpected outline item type: %s" - % type(sched_item)) + % type(outline_item)) del i diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 741ffd9e2..3a9d1f6bf 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -540,18 +540,18 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, ReturnFromKernel) - for sched_item in kernel.outline: - if isinstance(sched_item, EnterLoop): + for outline_item in kernel.outline: + if isinstance(outline_item, EnterLoop): lines.append("subgraph cluster_%s { label=\"%s\"" - % (sched_item.iname, sched_item.iname)) - elif isinstance(sched_item, LeaveLoop): + % (outline_item.iname, outline_item.iname)) + elif isinstance(outline_item, LeaveLoop): lines.append("}") - elif isinstance(sched_item, RunInstruction): - lines.append(sched_item.insn_id) - elif isinstance(sched_item, (CallKernel, ReturnFromKernel, Barrier)): + elif isinstance(outline_item, RunInstruction): + lines.append(outline_item.insn_id) + elif isinstance(outline_item, (CallKernel, ReturnFromKernel, Barrier)): pass else: - raise LoopyError("outline item not unterstood: %r" % sched_item) + raise LoopyError("outline item not unterstood: %r" % outline_item) return "digraph %s {\n%s\n}" % ( kernel.name, @@ -1731,9 +1731,9 @@ def get_subkernels(kernel): from loopy.outline import CallKernel - return tuple(sched_item.kernel_name - for sched_item in kernel.outline - if isinstance(sched_item, CallKernel)) + return tuple(outline_item.kernel_name + for outline_item in kernel.outline + if isinstance(outline_item, CallKernel)) @memoize_on_first_arg @@ -1747,21 +1747,21 @@ def get_subkernel_to_insn_id_map(kernel): raise LoopyError("Kernel must be outlined") from loopy.outline import ( - sched_item_to_insn_id, CallKernel, ReturnFromKernel) + outline_item_to_insn_id, CallKernel, ReturnFromKernel) subkernel = None result = {} - for sched_item in kernel.outline: - if isinstance(sched_item, CallKernel): - subkernel = sched_item.kernel_name + for outline_item in kernel.outline: + if isinstance(outline_item, CallKernel): + subkernel = outline_item.kernel_name result[subkernel] = set() - if isinstance(sched_item, ReturnFromKernel): + if isinstance(outline_item, ReturnFromKernel): subkernel = None if subkernel is not None: - for insn_id in sched_item_to_insn_id(sched_item): + for insn_id in outline_item_to_insn_id(outline_item): result[subkernel].add(insn_id) for subkernel in result: diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 759bbe84b..18b8322e7 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -124,22 +124,22 @@ def gather_outline_block(outline, start_idx): assert False -def generate_sub_sched_items(outline, start_idx): +def generate_sub_outline_items(outline, start_idx): if not isinstance(outline[start_idx], BeginBlockItem): yield start_idx, outline[start_idx] level = 0 i = start_idx while i < len(outline): - sched_item = outline[i] - if isinstance(sched_item, BeginBlockItem): + outline_item = outline[i] + if isinstance(outline_item, BeginBlockItem): level += 1 - elif isinstance(sched_item, EndBlockItem): + elif isinstance(outline_item, EndBlockItem): level -= 1 else: - yield i, sched_item + yield i, outline_item if level == 0: return @@ -151,57 +151,57 @@ def generate_sub_sched_items(outline, start_idx): def get_insn_ids_for_block_at(outline, start_idx): return frozenset( - sub_sched_item.insn_id - for i, sub_sched_item in generate_sub_sched_items( + sub_outline_item.insn_id + for i, sub_outline_item in generate_sub_outline_items( outline, start_idx) - if isinstance(sub_sched_item, RunInstruction)) + if isinstance(sub_outline_item, RunInstruction)) def find_active_inames_at(kernel, sched_index): active_inames = [] from loopy.outline import EnterLoop, LeaveLoop - for sched_item in kernel.outline[:sched_index]: - if isinstance(sched_item, EnterLoop): - active_inames.append(sched_item.iname) - if isinstance(sched_item, LeaveLoop): + for outline_item in kernel.outline[:sched_index]: + if isinstance(outline_item, EnterLoop): + active_inames.append(outline_item.iname) + if isinstance(outline_item, LeaveLoop): active_inames.pop() return set(active_inames) def has_barrier_within(kernel, sched_index): - sched_item = kernel.outline[sched_index] + outline_item = kernel.outline[sched_index] - if isinstance(sched_item, BeginBlockItem): + if isinstance(outline_item, BeginBlockItem): loop_contents, _ = gather_outline_block( kernel.outline, sched_index) from pytools import any - return any(isinstance(subsched_item, Barrier) - for subsched_item in loop_contents) - elif isinstance(sched_item, Barrier): + return any(isinstance(suboutline_item, Barrier) + for suboutline_item in loop_contents) + elif isinstance(outline_item, Barrier): return True else: return False def find_used_inames_within(kernel, sched_index): - sched_item = kernel.outline[sched_index] + outline_item = kernel.outline[sched_index] - if isinstance(sched_item, BeginBlockItem): + if isinstance(outline_item, BeginBlockItem): loop_contents, _ = gather_outline_block( kernel.outline, sched_index) - run_insns = [subsched_item - for subsched_item in loop_contents - if isinstance(subsched_item, RunInstruction)] - elif isinstance(sched_item, RunInstruction): - run_insns = [sched_item] + run_insns = [suboutline_item + for suboutline_item in loop_contents + if isinstance(suboutline_item, RunInstruction)] + elif isinstance(outline_item, RunInstruction): + run_insns = [outline_item] else: return set() result = set() - for sched_item in run_insns: - result.update(kernel.insn_inames(sched_item.insn_id)) + for outline_item in run_insns: + result.update(kernel.insn_inames(outline_item.insn_id)) return result @@ -405,15 +405,15 @@ def get_priority_tiers(wanted, priorities): yield tier -def sched_item_to_insn_id(sched_item): +def outline_item_to_insn_id(outline_item): # Helper for use in generator expressions, i.e. - # (... for insn_id in sched_item_to_insn_id(item) ...) - if isinstance(sched_item, RunInstruction): - yield sched_item.insn_id - elif isinstance(sched_item, Barrier): - if (hasattr(sched_item, "originating_insn_id") - and sched_item.originating_insn_id is not None): - yield sched_item.originating_insn_id + # (... for insn_id in outline_item_to_insn_id(item) ...) + if isinstance(outline_item, RunInstruction): + yield outline_item.insn_id + elif isinstance(outline_item, Barrier): + if (hasattr(outline_item, "originating_insn_id") + and outline_item.originating_insn_id is not None): + yield outline_item.originating_insn_id # }}} @@ -461,33 +461,33 @@ def dump_outline(kernel, outline): indent = "" from loopy.kernel.data import MultiAssignmentBase - for sched_item in outline: - if isinstance(sched_item, EnterLoop): - lines.append(indent + "for %s" % sched_item.iname) + for outline_item in outline: + if isinstance(outline_item, EnterLoop): + lines.append(indent + "for %s" % outline_item.iname) indent += " " - elif isinstance(sched_item, LeaveLoop): + elif isinstance(outline_item, LeaveLoop): indent = indent[:-4] - lines.append(indent + "end %s" % sched_item.iname) - elif isinstance(sched_item, CallKernel): + lines.append(indent + "end %s" % outline_item.iname) + elif isinstance(outline_item, CallKernel): lines.append(indent + "CALL KERNEL %s(extra_args=%s, extra_inames=%s)" % ( - sched_item.kernel_name, - sched_item.extra_args, - sched_item.extra_inames)) + outline_item.kernel_name, + outline_item.extra_args, + outline_item.extra_inames)) indent += " " - elif isinstance(sched_item, ReturnFromKernel): + elif isinstance(outline_item, ReturnFromKernel): indent = indent[:-4] - lines.append(indent + "RETURN FROM KERNEL %s" % sched_item.kernel_name) - elif isinstance(sched_item, RunInstruction): - insn = kernel.id_to_insn[sched_item.insn_id] + lines.append(indent + "RETURN FROM KERNEL %s" % outline_item.kernel_name) + elif isinstance(outline_item, RunInstruction): + insn = kernel.id_to_insn[outline_item.insn_id] if isinstance(insn, MultiAssignmentBase): - insn_str = format_insn(kernel, sched_item.insn_id) + insn_str = format_insn(kernel, outline_item.insn_id) else: - insn_str = sched_item.insn_id + insn_str = outline_item.insn_id lines.append(indent + insn_str) - elif isinstance(sched_item, Barrier): + elif isinstance(outline_item, Barrier): lines.append(indent + "... %sbarrier" % - sched_item.synchronization_kind[0]) + outline_item.synchronization_kind[0]) else: assert False @@ -788,7 +788,7 @@ def generate_loop_outlines_internal( insn_ids_to_try.extend( insn_id for item in sched_state.preoutline - for insn_id in sched_item_to_insn_id(item)) + for insn_id in outline_item_to_insn_id(item)) for insn_id in insn_ids_to_try: insn = kernel.id_to_insn[insn_id] @@ -1030,16 +1030,16 @@ def generate_loop_outlines_internal( seen_an_insn = False ignore_count = 0 - for sched_item in sched_state.outline[::-1]: - if isinstance(sched_item, RunInstruction): + for outline_item in sched_state.outline[::-1]: + if isinstance(outline_item, RunInstruction): seen_an_insn = True - elif isinstance(sched_item, LeaveLoop): + elif isinstance(outline_item, LeaveLoop): ignore_count += 1 - elif isinstance(sched_item, EnterLoop): + elif isinstance(outline_item, EnterLoop): if ignore_count: ignore_count -= 1 else: - assert sched_item.iname == last_entered_loop + assert outline_item.iname == last_entered_loop if seen_an_insn: can_leave = True break @@ -1331,9 +1331,9 @@ def convert_barrier_instructions_to_barriers(kernel, outline): from loopy.kernel.instruction import BarrierInstruction result = [] - for sched_item in outline: - if isinstance(sched_item, RunInstruction): - insn = kernel.id_to_insn[sched_item.insn_id] + for outline_item in outline: + if isinstance(outline_item, RunInstruction): + insn = kernel.id_to_insn[outline_item.insn_id] if isinstance(insn, BarrierInstruction): result.append(Barrier( synchronization_kind=insn.synchronization_kind, @@ -1342,7 +1342,7 @@ def convert_barrier_instructions_to_barriers(kernel, outline): comment="Barrier inserted due to %s" % insn.id)) continue - result.append(sched_item) + result.append(outline_item) return result @@ -1609,10 +1609,10 @@ def _insn_ids_reaching_end(outline, kind, reverse): insn_ids_alive_at_scope = [set()] - for sched_item in outline: - if isinstance(sched_item, enter_scope_item_kind): + for outline_item in outline: + if isinstance(outline_item, enter_scope_item_kind): insn_ids_alive_at_scope.append(set()) - elif isinstance(sched_item, leave_scope_item_kind): + elif isinstance(outline_item, leave_scope_item_kind): innermost_scope = insn_ids_alive_at_scope.pop() # Instructions in deeper scopes are alive but could be killed by # barriers at a shallower level, e.g.: @@ -1624,7 +1624,7 @@ def _insn_ids_reaching_end(outline, kind, reverse): # # Hence we merge this scope into the parent scope. insn_ids_alive_at_scope[-1].update(innermost_scope) - elif isinstance(sched_item, Barrier): + elif isinstance(outline_item, Barrier): # This barrier kills only the instruction ids that are alive at # the current scope (or deeper). Without further analysis, we # can't assume that instructions at shallower scope can be @@ -1639,11 +1639,11 @@ def _insn_ids_reaching_end(outline, kind, reverse): # barrier() # end if barrier_kind_more_or_equally_global( - sched_item.synchronization_kind, kind): + outline_item.synchronization_kind, kind): insn_ids_alive_at_scope[-1].clear() else: insn_ids_alive_at_scope[-1] |= set( - insn_id for insn_id in sched_item_to_insn_id(sched_item)) + insn_id for insn_id in outline_item_to_insn_id(outline_item)) assert len(insn_ids_alive_at_scope) == 1 return insn_ids_alive_at_scope[-1] @@ -1704,9 +1704,9 @@ def insert_barriers( i = 0 while i < len(outline): - sched_item = outline[i] + outline_item = outline[i] - if isinstance(sched_item, EnterLoop): + if isinstance(outline_item, EnterLoop): subloop, new_i = gather_outline_block(outline, i) loop_head = ( @@ -1752,30 +1752,30 @@ def insert_barriers( i = new_i - elif isinstance(sched_item, Barrier): - result.append(sched_item) + elif isinstance(outline_item, Barrier): + result.append(outline_item) if barrier_kind_more_or_equally_global( - sched_item.synchronization_kind, synchronization_kind): + outline_item.synchronization_kind, synchronization_kind): dep_tracker.discard_all_sources() i += 1 - elif isinstance(sched_item, RunInstruction): + elif isinstance(outline_item, RunInstruction): for dep in dep_tracker.gen_dependencies_with_target_at( - sched_item.insn_id): + outline_item.insn_id): append_barrier_or_raise_error(result, dep, verify_only) dep_tracker.discard_all_sources() break - result.append(sched_item) - dep_tracker.add_source(sched_item.insn_id) + result.append(outline_item) + dep_tracker.add_source(outline_item.insn_id) i += 1 - elif isinstance(sched_item, (CallKernel, ReturnFromKernel)): - result.append(sched_item) + elif isinstance(outline_item, (CallKernel, ReturnFromKernel)): + result.append(outline_item) i += 1 else: raise ValueError("unexpected outline item type '%s'" - % type(sched_item).__name__) + % type(outline_item).__name__) return result @@ -1786,9 +1786,9 @@ def insert_barriers( result = [] i = 0 while i < len(outline): - sched_item = outline[i] + outline_item = outline[i] - if isinstance(sched_item, EnterLoop): + if isinstance(outline_item, EnterLoop): subloop, new_i = gather_outline_block(outline, i) new_subloop = insert_barriers( kernel, subloop[1:-1], synchronization_kind, verify_only, @@ -1798,14 +1798,14 @@ def insert_barriers( result.append(subloop[-1]) i = new_i - elif isinstance(sched_item, + elif isinstance(outline_item, (Barrier, RunInstruction, CallKernel, ReturnFromKernel)): - result.append(sched_item) + result.append(outline_item) i += 1 else: raise ValueError("unexpected outline item type '%s'" - % type(sched_item).__name__) + % type(outline_item).__name__) # }}} @@ -1869,7 +1869,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): preoutlined_insn_ids = set( insn_id for item in preoutline - for insn_id in sched_item_to_insn_id(item)) + for insn_id in outline_item_to_insn_id(item)) from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, filter_iname_tags_by_type) diff --git a/loopy/outline/device_mapping.py b/loopy/outline/device_mapping.py index cd8288f5e..2f58a2b14 100644 --- a/loopy/outline/device_mapping.py +++ b/loopy/outline/device_mapping.py @@ -70,13 +70,13 @@ def map_outline_onto_host_or_device_impl(kernel, device_prog_name_gen): i = start_idx current_chunk = [] while i <= end_idx: - sched_item = outline[i] + outline_item = outline[i] - if isinstance(sched_item, RunInstruction): - current_chunk.append(sched_item) + if isinstance(outline_item, RunInstruction): + current_chunk.append(outline_item) i += 1 - elif isinstance(sched_item, EnterLoop): + elif isinstance(outline_item, EnterLoop): loop_end = loop_bounds[i] inner_outline = [] loop_required_splitting = inner_mapper( @@ -105,8 +105,8 @@ def map_outline_onto_host_or_device_impl(kernel, device_prog_name_gen): inner_outline + [end_item]) - elif isinstance(sched_item, Barrier): - if sched_item.synchronization_kind == "global": + elif isinstance(outline_item, Barrier): + if outline_item.synchronization_kind == "global": # Wrap the current chunk into a kernel call. outline_required_splitting = True if current_chunk: @@ -114,14 +114,14 @@ def map_outline_onto_host_or_device_impl(kernel, device_prog_name_gen): [dummy_call.copy()] + current_chunk + [dummy_return.copy()]) - new_outline.append(sched_item) + new_outline.append(outline_item) current_chunk = [] else: - current_chunk.append(sched_item) + current_chunk.append(outline_item) i += 1 else: raise LoopyError("unexpected type of outline item: %s" - % type(sched_item).__name__) + % type(outline_item).__name__) if current_chunk and outline_required_splitting: # Wrap remainder of outline into a kernel call. @@ -148,18 +148,18 @@ def map_outline_onto_host_or_device_impl(kernel, device_prog_name_gen): # Assign names, extra_inames to CallKernel / ReturnFromKernel instructions inames = [] - for idx, sched_item in enumerate(new_outline): - if isinstance(sched_item, CallKernel): + for idx, outline_item in enumerate(new_outline): + if isinstance(outline_item, CallKernel): last_kernel_name = device_prog_name_gen() - new_outline[idx] = sched_item.copy( + new_outline[idx] = outline_item.copy( kernel_name=last_kernel_name, extra_inames=list(inames)) - elif isinstance(sched_item, ReturnFromKernel): - new_outline[idx] = sched_item.copy( + elif isinstance(outline_item, ReturnFromKernel): + new_outline[idx] = outline_item.copy( kernel_name=last_kernel_name) - elif isinstance(sched_item, EnterLoop): - inames.append(sched_item.iname) - elif isinstance(sched_item, LeaveLoop): + elif isinstance(outline_item, EnterLoop): + inames.append(outline_item.iname) + elif isinstance(outline_item, LeaveLoop): inames.pop() new_kernel = kernel.copy(outline=new_outline) diff --git a/loopy/outline/tools.py b/loopy/outline/tools.py index e0e69a9b3..8946f6ebb 100644 --- a/loopy/outline/tools.py +++ b/loopy/outline/tools.py @@ -36,10 +36,10 @@ def get_block_boundaries(outline): from loopy.outline import (BeginBlockItem, EndBlockItem) block_bounds = {} active_blocks = [] - for idx, sched_item in enumerate(outline): - if isinstance(sched_item, BeginBlockItem): + for idx, outline_item in enumerate(outline): + if isinstance(outline_item, BeginBlockItem): active_blocks.append(idx) - elif isinstance(sched_item, EndBlockItem): + elif isinstance(outline_item, EndBlockItem): start = active_blocks.pop() block_bounds[start] = idx block_bounds[idx] = start @@ -80,9 +80,9 @@ def add_extra_args_to_outline(kernel): new_outline = [] from loopy.outline import CallKernel - for sched_item in kernel.outline: - if isinstance(sched_item, CallKernel): - subkernel = sched_item.kernel_name + for outline_item in kernel.outline: + if isinstance(outline_item, CallKernel): + subkernel = outline_item.kernel_name used_temporaries = ( temporaries_read_in_subkernel(kernel, subkernel) @@ -96,12 +96,12 @@ def add_extra_args_to_outline(kernel): and kernel.temporary_variables[tv].initializer is None and - tv not in sched_item.extra_args) + tv not in outline_item.extra_args) - new_outline.append(sched_item.copy( - extra_args=sched_item.extra_args + sorted(more_args))) + new_outline.append(outline_item.copy( + extra_args=outline_item.extra_args + sorted(more_args))) else: - new_outline.append(sched_item) + new_outline.append(outline_item) return kernel.copy(outline=new_outline) diff --git a/loopy/statistics.py b/loopy/statistics.py index ac3d35589..0d646a27f 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1740,29 +1740,29 @@ def get_synchronization_map(knl, subgroup_size=None): else: return one - for sched_item in knl.outline: - if isinstance(sched_item, EnterLoop): - if sched_item.iname: # (if not empty) - iname_list.append(sched_item.iname) - elif isinstance(sched_item, LeaveLoop): - if sched_item.iname: # (if not empty) + for outline_item in knl.outline: + if isinstance(outline_item, EnterLoop): + if outline_item.iname: # (if not empty) + iname_list.append(outline_item.iname) + elif isinstance(outline_item, LeaveLoop): + if outline_item.iname: # (if not empty) iname_list.pop() - elif isinstance(sched_item, Barrier): + elif isinstance(outline_item, Barrier): result = result + ToCountMap({"barrier_%s" % - sched_item.synchronization_kind: + outline_item.synchronization_kind: get_count_poly(iname_list)}) - elif isinstance(sched_item, CallKernel): + elif isinstance(outline_item, CallKernel): result = result + ToCountMap( {"kernel_launch": get_count_poly(iname_list)}) - elif isinstance(sched_item, (ReturnFromKernel, RunInstruction)): + elif isinstance(outline_item, (ReturnFromKernel, RunInstruction)): pass else: raise LoopyError("unexpected outline item: %s" - % type(sched_item).__name__) + % type(outline_item).__name__) return result diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 4b677fa8b..3c1b06d00 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -108,10 +108,10 @@ class LivenessAnalysis(object): gen = dict((idx, set()) for idx in range(len(self.outline))) kill = dict((idx, set()) for idx in range(len(self.outline))) - for sched_idx, sched_item in enumerate(self.outline): - if not isinstance(sched_item, RunInstruction): + for sched_idx, outline_item in enumerate(self.outline): + if not isinstance(outline_item, RunInstruction): continue - insn = self.kernel.id_to_insn[sched_item.insn_id] + insn = self.kernel.id_to_insn[outline_item.insn_id] for var in insn.assignee_var_names(): if var not in self.kernel.temporary_variables: continue @@ -160,13 +160,13 @@ class LivenessAnalysis(object): def print_liveness(self): print(75 * "-") print("LIVE IN:") - for sched_idx, sched_item in enumerate(self.outline): + for sched_idx, outline_item in enumerate(self.outline): print("{item}: {{{vars}}}".format( item=sched_idx, vars=", ".join(sorted(self[sched_idx].live_in)))) print(75 * "-") print("LIVE OUT:") - for sched_idx, sched_item in enumerate(self.outline): + for sched_idx, outline_item in enumerate(self.outline): print("{item}: {{{vars}}}".format( item=sched_idx, vars=", ".join(sorted(self[sched_idx].live_out)))) @@ -316,11 +316,11 @@ class TemporarySaver(object): def subkernel_to_slice_indices(self): result = {} - for sched_item_idx, sched_item in enumerate(self.kernel.outline): - if isinstance(sched_item, CallKernel): - start_idx = sched_item_idx - elif isinstance(sched_item, ReturnFromKernel): - result[sched_item.kernel_name] = (start_idx, 1 + sched_item_idx) + for outline_item_idx, outline_item in enumerate(self.kernel.outline): + if isinstance(outline_item, CallKernel): + start_idx = outline_item_idx + elif isinstance(outline_item, ReturnFromKernel): + result[outline_item.kernel_name] = (start_idx, 1 + outline_item_idx) return result @@ -331,17 +331,17 @@ class TemporarySaver(object): within_subkernel = False result = {} - for sched_item_idx, sched_item in enumerate(self.kernel.outline): - if isinstance(sched_item, CallKernel): + for outline_item_idx, outline_item in enumerate(self.kernel.outline): + if isinstance(outline_item, CallKernel): within_subkernel = True - result[sched_item.kernel_name] = frozenset(current_outer_inames) - elif isinstance(sched_item, ReturnFromKernel): + result[outline_item.kernel_name] = frozenset(current_outer_inames) + elif isinstance(outline_item, ReturnFromKernel): within_subkernel = False - elif isinstance(sched_item, EnterLoop): + elif isinstance(outline_item, EnterLoop): if not within_subkernel: - current_outer_inames.add(sched_item.iname) - elif isinstance(sched_item, LeaveLoop): - current_outer_inames.discard(sched_item.iname) + current_outer_inames.add(outline_item.iname) + elif isinstance(outline_item, LeaveLoop): + current_outer_inames.discard(outline_item.iname) return result @@ -751,38 +751,38 @@ def save_and_reload_temporaries(knl): from loopy.outline.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - for sched_idx, sched_item in enumerate(knl.outline): + for sched_idx, outline_item in enumerate(knl.outline): - if isinstance(sched_item, CallKernel): + if isinstance(outline_item, CallKernel): # Any written temporary that is live-out needs to be read into # memory because of the potential for partial writes. if sched_idx == 0: # Kernel entry: nothing live interesting_temporaries = set() else: - subkernel = sched_item.kernel_name + subkernel = outline_item.kernel_name interesting_temporaries = ( temporaries_read_in_subkernel(knl, subkernel) | temporaries_written_in_subkernel(knl, subkernel)) for temporary in liveness[sched_idx].live_out & interesting_temporaries: logger.info("reloading {0} at entry of {1}" - .format(temporary, sched_item.kernel_name)) - saver.reload(temporary, sched_item.kernel_name) + .format(temporary, outline_item.kernel_name)) + saver.reload(temporary, outline_item.kernel_name) - elif isinstance(sched_item, ReturnFromKernel): + elif isinstance(outline_item, ReturnFromKernel): if sched_idx == len(knl.outline) - 1: # Kernel exit: nothing live interesting_temporaries = set() else: - subkernel = sched_item.kernel_name + subkernel = outline_item.kernel_name interesting_temporaries = ( temporaries_written_in_subkernel(knl, subkernel)) for temporary in liveness[sched_idx].live_in & interesting_temporaries: logger.info("saving {0} before return of {1}" - .format(temporary, sched_item.kernel_name)) - saver.save(temporary, sched_item.kernel_name) + .format(temporary, outline_item.kernel_name)) + saver.save(temporary, outline_item.kernel_name) return saver.finish() diff --git a/test/test_loopy.py b/test/test_loopy.py index 04b018125..7742dd807 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2214,24 +2214,24 @@ def barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): seen_barrier = False loop_level = 0 - for sched_item in knl.outline: - if isinstance(sched_item, RunInstruction): - if sched_item.insn_id == id1: + for outline_item in knl.outline: + if isinstance(outline_item, RunInstruction): + if outline_item.insn_id == id1: watch_for_barrier = True - elif sched_item.insn_id == id2: + elif outline_item.insn_id == id2: return watch_for_barrier and seen_barrier - elif isinstance(sched_item, Barrier): + elif isinstance(outline_item, Barrier): if watch_for_barrier and loop_level not in ignore_barriers_in_levels: seen_barrier = True - elif isinstance(sched_item, EnterLoop): + elif isinstance(outline_item, EnterLoop): loop_level += 1 - elif isinstance(sched_item, LeaveLoop): + elif isinstance(outline_item, LeaveLoop): loop_level -= 1 - elif isinstance(sched_item, (CallKernel, ReturnFromKernel)): + elif isinstance(outline_item, (CallKernel, ReturnFromKernel)): pass else: raise RuntimeError("outline item type '%s' not understood" - % type(sched_item).__name__) + % type(outline_item).__name__) raise RuntimeError("id2 was not seen") -- GitLab From f3da73d9723c80baa226cb2ee603df0c302a52bd Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:45:16 -0600 Subject: [PATCH 46/56] renamed sched_state->outline_state --- loopy/outline/__init__.py | 210 +++++++++++++++++++------------------- 1 file changed, 105 insertions(+), 105 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 18b8322e7..b784c98fa 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -652,10 +652,10 @@ class OutlinerState(ImmutableRecord): def generate_loop_outlines_internal( - sched_state, allow_boost=False, debug=None): + outline_state, allow_boost=False, debug=None): # allow_insn is set to False initially and after entering each loop # to give loops containing high-priority instructions a chance. - kernel = sched_state.kernel + kernel = outline_state.kernel Fore = kernel.options._fore # noqa Style = kernel.options._style # noqa @@ -664,11 +664,11 @@ def generate_loop_outlines_internal( else: rec_allow_boost = False - active_inames_set = frozenset(sched_state.active_inames) + active_inames_set = frozenset(outline_state.active_inames) next_preoutline_item = ( - sched_state.preoutline[0] - if len(sched_state.preoutline) > 0 + outline_state.preoutline[0] + if len(outline_state.preoutline) > 0 else None) # {{{ decide about debug mode @@ -677,7 +677,7 @@ def generate_loop_outlines_internal( if debug is not None: if (debug.debug_length is not None - and len(sched_state.outline) >= debug.debug_length): + and len(outline_state.outline) >= debug.debug_length): debug_mode = True if debug_mode: @@ -688,15 +688,15 @@ def generate_loop_outlines_internal( print(kernel.stringify(with_dependencies=True)) print(75*"=") print("CURRENT OUTLINE:") - print(dump_outline(sched_state.kernel, sched_state.outline)) - if sched_state.preoutline: + print(dump_outline(outline_state.kernel, outline_state.outline)) + if outline_state.preoutline: print(75*"=") print("PREOUTLINED ITEMS AWAITING OUTLINING:") - print(dump_outline(sched_state.kernel, sched_state.preoutline)) + print(dump_outline(outline_state.kernel, outline_state.preoutline)) #print("boost allowed:", allow_boost) print(75*"=") print("LOOP NEST MAP (inner: outer):") - for iname, val in six.iteritems(sched_state.loop_nest_around_map): + for iname, val in six.iteritems(outline_state.loop_nest_around_map): print("%s : %s" % (iname, ", ".join(val))) print(75*"=") @@ -711,28 +711,28 @@ def generate_loop_outlines_internal( # {{{ see if we have reached the start/end of kernel in the preoutline if isinstance(next_preoutline_item, CallKernel): - assert sched_state.within_subkernel is False + assert outline_state.within_subkernel is False for result in generate_loop_outlines_internal( - sched_state.copy( - outline=sched_state.outline + ( + outline_state.copy( + outline=outline_state.outline + ( next_preoutline_item,), - preoutline=sched_state.preoutline[1:], + preoutline=outline_state.preoutline[1:], within_subkernel=True, may_outline_global_barriers=False, - enclosing_subkernel_inames=sched_state.active_inames), + enclosing_subkernel_inames=outline_state.active_inames), allow_boost=rec_allow_boost, debug=debug): yield result if isinstance(next_preoutline_item, ReturnFromKernel): - assert sched_state.within_subkernel is True + assert outline_state.within_subkernel is True # Make sure all subkernel inames have finished. - if sched_state.active_inames == sched_state.enclosing_subkernel_inames: + if outline_state.active_inames == outline_state.enclosing_subkernel_inames: for result in generate_loop_outlines_internal( - sched_state.copy( - outline=sched_state.outline + ( + outline_state.copy( + outline=outline_state.outline + ( next_preoutline_item,), - preoutline=sched_state.preoutline[1:], + preoutline=outline_state.preoutline[1:], within_subkernel=False, may_outline_global_barriers=True), allow_boost=rec_allow_boost, @@ -751,10 +751,10 @@ def generate_loop_outlines_internal( isinstance(next_preoutline_item, Barrier) and next_preoutline_item.originating_insn_id is None): for result in generate_loop_outlines_internal( - sched_state.copy( - outline=sched_state.outline + ( + outline_state.copy( + outline=outline_state.outline + ( next_preoutline_item,), - preoutline=sched_state.preoutline[1:]), + preoutline=outline_state.preoutline[1:]), allow_boost=rec_allow_boost, debug=debug): yield result @@ -767,7 +767,7 @@ def generate_loop_outlines_internal( # the current loop nest, in this set: reachable_insn_ids = set() - active_groups = frozenset(sched_state.active_group_counts) + active_groups = frozenset(outline_state.active_group_counts) def insn_sort_key(insn_id): insn = kernel.id_to_insn[insn_id] @@ -777,23 +777,23 @@ def generate_loop_outlines_internal( return (insn.priority, len(active_groups & insn.groups), insn.id) # Use previous instruction sorting result if it is available - if sched_state.insn_ids_to_try is None: + if outline_state.insn_ids_to_try is None: insn_ids_to_try = sorted( - # Non-preoutlined instructions go first. - sched_state.unoutlined_insn_ids - sched_state.preoutlined_insn_ids, - key=insn_sort_key, reverse=True) + # Non-preoutlined instructions go first. + outline_state.unoutlined_insn_ids - outline_state.preoutlined_insn_ids, + key=insn_sort_key, reverse=True) else: - insn_ids_to_try = sched_state.insn_ids_to_try + insn_ids_to_try = outline_state.insn_ids_to_try insn_ids_to_try.extend( insn_id - for item in sched_state.preoutline + for item in outline_state.preoutline for insn_id in outline_item_to_insn_id(item)) for insn_id in insn_ids_to_try: insn = kernel.id_to_insn[insn_id] - is_ready = insn.depends_on <= sched_state.outlined_insn_ids + is_ready = insn.depends_on <= outline_state.outlined_insn_ids if not is_ready: if debug_mode: @@ -802,12 +802,12 @@ def generate_loop_outlines_internal( # print("instruction '%s' is missing insn depedencies '%s'" % ( # format_insn(kernel, insn.id), ",".join( - # insn.depends_on - sched_state.outlined_insn_ids))) + # insn.depends_on - outline_state.outlined_insn_ids))) pass continue - want = kernel.insn_inames(insn) - sched_state.parallel_inames - have = active_inames_set - sched_state.parallel_inames + want = kernel.insn_inames(insn) - outline_state.parallel_inames + have = active_inames_set - outline_state.parallel_inames # If insn is boostable, it may be placed inside a more deeply # nested loop without harm. @@ -831,7 +831,7 @@ def generate_loop_outlines_internal( # {{{ check if outlining this insn is compatible with preoutline - if insn_id in sched_state.preoutlined_insn_ids: + if insn_id in outline_state.preoutlined_insn_ids: if isinstance(next_preoutline_item, RunInstruction): next_preoutline_insn_id = next_preoutline_item.insn_id elif isinstance(next_preoutline_item, Barrier): @@ -853,13 +853,13 @@ def generate_loop_outlines_internal( from loopy.kernel.instruction import BarrierInstruction if isinstance(insn, BarrierInstruction) and \ insn.synchronization_kind == "global": - if not sched_state.may_outline_global_barriers: + if not outline_state.may_outline_global_barriers: if debug_mode: print("can't outline '%s' because global barriers are " "not currently allowed" % format_insn(kernel, insn.id)) is_ready = False else: - if not sched_state.within_subkernel: + if not outline_state.within_subkernel: if debug_mode: print("can't outline '%s' because not within subkernel" % format_insn(kernel, insn.id)) @@ -895,7 +895,7 @@ def generate_loop_outlines_internal( # {{{ update active group counts for added instruction if insn.groups: - new_active_group_counts = sched_state.active_group_counts.copy() + new_active_group_counts = outline_state.active_group_counts.copy() for grp in insn.groups: if grp in new_active_group_counts: @@ -905,9 +905,9 @@ def generate_loop_outlines_internal( else: new_active_group_counts[grp] = ( - sched_state.group_insn_counts[grp] - 1) + outline_state.group_insn_counts[grp] - 1) else: - new_active_group_counts = sched_state.active_group_counts + new_active_group_counts = outline_state.active_group_counts # }}} @@ -918,7 +918,7 @@ def generate_loop_outlines_internal( # invalidate instruction_ids_to_try when active group changes if set(new_active_group_counts.keys()) != set( - sched_state.active_group_counts.keys()): + outline_state.active_group_counts.keys()): new_insn_ids_to_try = None # }}} @@ -929,20 +929,20 @@ def generate_loop_outlines_internal( new_uses_of_boostability.append( (insn.id, orig_have & insn.boostable_into)) - new_sched_state = sched_state.copy( - outlined_insn_ids=sched_state.outlined_insn_ids | iid_set, - unoutlined_insn_ids=sched_state.unoutlined_insn_ids - iid_set, + new_outline_state = outline_state.copy( + outlined_insn_ids=outline_state.outlined_insn_ids | iid_set, + unoutlined_insn_ids=outline_state.unoutlined_insn_ids - iid_set, insn_ids_to_try=new_insn_ids_to_try, outline=( - sched_state.outline + ( + outline_state.outline + ( RunInstruction(insn_id=insn.id),)), preoutline=( - sched_state.preoutline - if insn_id not in sched_state.preoutlined_insn_ids - else sched_state.preoutline[1:]), + outline_state.preoutline + if insn_id not in outline_state.preoutlined_insn_ids + else outline_state.preoutline[1:]), active_group_counts=new_active_group_counts, uses_of_boostability=( - sched_state.uses_of_boostability + outline_state.uses_of_boostability + new_uses_of_boostability) ) @@ -950,11 +950,11 @@ def generate_loop_outlines_internal( # made, revert to top of outliner and see if more progress can be # made. for sub_sched in generate_loop_outlines_internal( - new_sched_state, + new_outline_state, allow_boost=rec_allow_boost, debug=debug): yield sub_sched - if not sched_state.group_insn_counts: + if not outline_state.group_insn_counts: # No groups: We won't need to backtrack on outlining # instructions. return @@ -963,13 +963,13 @@ def generate_loop_outlines_internal( # {{{ see if we're ready to leave the innermost loop - last_entered_loop = sched_state.last_entered_loop + last_entered_loop = outline_state.last_entered_loop if last_entered_loop is not None: can_leave = True if ( - last_entered_loop in sched_state.preoutlined_inames + last_entered_loop in outline_state.preoutlined_inames and not ( isinstance(next_preoutline_item, LeaveLoop) and next_preoutline_item.iname == last_entered_loop)): @@ -978,11 +978,11 @@ def generate_loop_outlines_internal( print("cannot leave '%s' because of preoutline constraints" % last_entered_loop) can_leave = False - elif last_entered_loop not in sched_state.breakable_inames: + elif last_entered_loop not in outline_state.breakable_inames: # If the iname is not breakable, then check that we've # outlined all the instructions that require it. - for insn_id in sched_state.unoutlined_insn_ids: + for insn_id in outline_state.unoutlined_insn_ids: insn = kernel.id_to_insn[insn_id] if last_entered_loop in kernel.insn_inames(insn): if debug_mode: @@ -992,10 +992,10 @@ def generate_loop_outlines_internal( # check if there's a dependency of insn that needs to be # outside of last_entered_loop. for subdep_id in gen_dependencies_except(kernel, insn_id, - sched_state.outlined_insn_ids): + outline_state.outlined_insn_ids): subdep = kernel.id_to_insn[insn_id] want = (kernel.insn_inames(subdep_id) - - sched_state.parallel_inames) + - outline_state.parallel_inames) if ( last_entered_loop not in want and last_entered_loop not in subdep.boostable_into): @@ -1030,7 +1030,7 @@ def generate_loop_outlines_internal( seen_an_insn = False ignore_count = 0 - for outline_item in sched_state.outline[::-1]: + for outline_item in outline_state.outline[::-1]: if isinstance(outline_item, RunInstruction): seen_an_insn = True elif isinstance(outline_item, LeaveLoop): @@ -1047,16 +1047,16 @@ def generate_loop_outlines_internal( if can_leave and not debug_mode: for sub_sched in generate_loop_outlines_internal( - sched_state.copy( + outline_state.copy( outline=( - sched_state.outline + outline_state.outline + (LeaveLoop(iname=last_entered_loop),)), - active_inames=sched_state.active_inames[:-1], + active_inames=outline_state.active_inames[:-1], preoutline=( - sched_state.preoutline + outline_state.preoutline if last_entered_loop - not in sched_state.preoutlined_inames - else sched_state.preoutline[1:]), + not in outline_state.preoutlined_inames + else outline_state.preoutline[1:]), ), allow_boost=rec_allow_boost, debug=debug): yield sub_sched @@ -1069,12 +1069,12 @@ def generate_loop_outlines_internal( # Find inames that are being referenced by as yet unoutlined instructions. needed_inames = set() - for insn_id in sched_state.unoutlined_insn_ids: + for insn_id in outline_state.unoutlined_insn_ids: needed_inames.update(kernel.insn_inames(insn_id)) needed_inames = (needed_inames # There's no notion of 'entering' a parallel loop - - sched_state.parallel_inames + - outline_state.parallel_inames # Don't reenter a loop we're already in. - active_inames_set) @@ -1082,12 +1082,12 @@ def generate_loop_outlines_internal( if debug_mode: print(75*"-") print("inames still needed :", ",".join(needed_inames)) - print("active inames :", ",".join(sched_state.active_inames)) - print("inames entered so far :", ",".join(sched_state.entered_inames)) + print("active inames :", ",".join(outline_state.active_inames)) + print("inames entered so far :", ",".join(outline_state.entered_inames)) print("reachable insns:", ",".join(reachable_insn_ids)) print("active groups (with insn counts):", ",".join( "%s: %d" % (grp, c) - for grp, c in six.iteritems(sched_state.active_group_counts))) + for grp, c in six.iteritems(outline_state.active_group_counts))) print(75*"-") if needed_inames: @@ -1098,7 +1098,7 @@ def generate_loop_outlines_internal( # {{{ check if outlining this iname now is allowed/plausible if ( - iname in sched_state.preoutlined_inames + iname in outline_state.preoutlined_inames and not ( isinstance(next_preoutline_item, EnterLoop) and next_preoutline_item.iname == iname)): @@ -1108,17 +1108,17 @@ def generate_loop_outlines_internal( continue currently_accessible_inames = ( - active_inames_set | sched_state.parallel_inames) + active_inames_set | outline_state.parallel_inames) if ( - not sched_state.loop_nest_around_map[iname] + not outline_state.loop_nest_around_map[iname] <= currently_accessible_inames): if debug_mode: print("outlining %s prohibited by loop nest-around map" % iname) continue if ( - not sched_state.loop_insn_dep_map.get(iname, set()) - <= sched_state.outlined_insn_ids): + not outline_state.loop_insn_dep_map.get(iname, set()) + <= outline_state.outlined_insn_ids): if debug_mode: print( "outlining {iname} prohibited by loop dependency map " @@ -1126,9 +1126,9 @@ def generate_loop_outlines_internal( .format( iname=iname, needed_insns=", ".join( - sched_state.loop_insn_dep_map.get(iname, set()) + outline_state.loop_insn_dep_map.get(iname, set()) - - sched_state.outlined_insn_ids))) + outline_state.outlined_insn_ids))) continue @@ -1152,7 +1152,7 @@ def generate_loop_outlines_internal( & set(kernel.temporary_variables)): writer_insn, = kernel.writer_map()[domain_par] - if writer_insn not in sched_state.outlined_insn_ids: + if writer_insn not in outline_state.outlined_insn_ids: data_dep_written = False if debug_mode: print("iname '%s' not outlined because domain " @@ -1197,19 +1197,19 @@ def generate_loop_outlines_internal( # loops in the second are not even tried (and so on). loop_priority_set = set().union(*[set(prio) for prio in - sched_state.kernel.loop_priority]) + outline_state.kernel.loop_priority]) useful_loops_set = set(six.iterkeys(iname_to_usefulness)) useful_and_desired = useful_loops_set & loop_priority_set if useful_and_desired: wanted = ( useful_and_desired - - sched_state.ilp_inames - - sched_state.vec_inames + - outline_state.ilp_inames + - outline_state.vec_inames ) priority_tiers = [t for t in get_priority_tiers(wanted, - sched_state.kernel.loop_priority + outline_state.kernel.loop_priority ) ] @@ -1220,26 +1220,26 @@ def generate_loop_outlines_internal( priority_tiers.append( useful_loops_set - loop_priority_set - - sched_state.ilp_inames - - sched_state.vec_inames + - outline_state.ilp_inames + - outline_state.vec_inames ) else: priority_tiers = [ useful_loops_set - - sched_state.ilp_inames - - sched_state.vec_inames + - outline_state.ilp_inames + - outline_state.vec_inames ] # vectorization must be the absolute innermost loop priority_tiers.extend([ [iname] - for iname in sched_state.ilp_inames + for iname in outline_state.ilp_inames if iname in useful_loops_set ]) priority_tiers.extend([ [iname] - for iname in sched_state.vec_inames + for iname in outline_state.vec_inames if iname in useful_loops_set ]) @@ -1260,19 +1260,19 @@ def generate_loop_outlines_internal( reverse=True): for sub_sched in generate_loop_outlines_internal( - sched_state.copy( + outline_state.copy( outline=( - sched_state.outline + outline_state.outline + (EnterLoop(iname=iname),)), active_inames=( - sched_state.active_inames + (iname,)), + outline_state.active_inames + (iname,)), entered_inames=( - sched_state.entered_inames + outline_state.entered_inames | frozenset((iname,))), preoutline=( - sched_state.preoutline - if iname not in sched_state.preoutlined_inames - else sched_state.preoutline[1:]), + outline_state.preoutline + if iname not in outline_state.preoutlined_inames + else outline_state.preoutline[1:]), ), allow_boost=rec_allow_boost, debug=debug): @@ -1293,13 +1293,13 @@ def generate_loop_outlines_internal( raise OutlineDebugInput(inp) if ( - not sched_state.active_inames - and not sched_state.unoutlined_insn_ids - and not sched_state.preoutline): + not outline_state.active_inames + and not outline_state.unoutlined_insn_ids + and not outline_state.preoutline): # if done, yield result - debug.log_success(sched_state.outline) + debug.log_success(outline_state.outline) - for boost_insn_id, boost_inames in sched_state.uses_of_boostability: + for boost_insn_id, boost_inames in outline_state.uses_of_boostability: warn_with_kernel( kernel, "used_boostability", "instruction '%s' was implicitly nested inside " @@ -1308,19 +1308,19 @@ def generate_loop_outlines_internal( % (boost_insn_id, ", ".join(boost_inames)), DeprecationWarning) - yield sched_state.outline + yield outline_state.outline else: if not allow_boost and allow_boost is not None: # try again with boosting allowed for sub_sched in generate_loop_outlines_internal( - sched_state, + outline_state, allow_boost=True, debug=debug): yield sub_sched else: # dead end if debug is not None: - debug.log_dead_end(sched_state.outline) + debug.log_dead_end(outline_state.outline) # }}} @@ -1888,7 +1888,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) - sched_state = OutlinerState( + outline_state = OutlinerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, loop_insn_dep_map=find_loop_insn_dep_map( @@ -1948,7 +1948,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): while True: try: for _ in generate_loop_outlines_internal( - sched_state, debug=debug, **outline_gen_kwargs): + outline_state, debug=debug, **outline_gen_kwargs): pass except OutlineDebugInput as e: @@ -1959,7 +1959,7 @@ def generate_loop_outlines_inner(kernel, debug_args={}): try: for gen_sched in generate_loop_outlines_internal( - sched_state, debug=debug, **outline_gen_kwargs): + outline_state, debug=debug, **outline_gen_kwargs): debug.stop() gen_sched = convert_barrier_instructions_to_barriers( -- GitLab From 1b38874f9bae01b2d56748c700cdcdbb1f46d27e Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:46:21 -0600 Subject: [PATCH 47/56] renamed sched_idx->outline_idx --- loopy/transform/save.py | 46 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 3c1b06d00..d88d154a4 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -74,18 +74,18 @@ class LivenessAnalysis(object): for idx, (item, next_item) in enumerate(zip( reversed(self.outline), reversed(self.outline + [None]))): - sched_idx = len(self.outline) - idx - 1 + outline_idx = len(self.outline) - idx - 1 # Look at next_item if next_item is None: after = set() elif isinstance(next_item, EnterLoop): # Account for empty loop - loop_end = block_bounds[sched_idx + 1] - after = successors[loop_end] | set([sched_idx + 1]) + loop_end = block_bounds[outline_idx + 1] + after = successors[loop_end] | set([outline_idx + 1]) elif isinstance(next_item, (LeaveLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier)): - after = set([sched_idx + 1]) + after = set([outline_idx + 1]) else: raise LoopyError("unexpected type of outline item: {ty}" .format(ty=type(next_item).__name__)) @@ -93,14 +93,14 @@ class LivenessAnalysis(object): # Look at item if isinstance(item, LeaveLoop): # Account for loop - loop_begin = block_bounds[sched_idx] + loop_begin = block_bounds[outline_idx] after |= set([loop_begin]) elif not isinstance(item, (EnterLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier)): raise LoopyError("unexpected type of outline item: {ty}" .format(ty=type(item).__name__)) - successors[sched_idx] = after + successors[outline_idx] = after return successors @@ -108,7 +108,7 @@ class LivenessAnalysis(object): gen = dict((idx, set()) for idx in range(len(self.outline))) kill = dict((idx, set()) for idx in range(len(self.outline))) - for sched_idx, outline_item in enumerate(self.outline): + for outline_idx, outline_item in enumerate(self.outline): if not isinstance(outline_item, RunInstruction): continue insn = self.kernel.id_to_insn[outline_item.insn_id] @@ -117,7 +117,7 @@ class LivenessAnalysis(object): continue if not insn.predicates: # Fully kills the liveness only when unconditional. - kill[sched_idx].add(var) + kill[outline_idx].add(var) if len(self.kernel.temporary_variables[var].shape) > 0: # For an array variable, all definitions generate a use as # well, because the write could be a partial write, @@ -126,11 +126,11 @@ class LivenessAnalysis(object): # We don't currently check if the write is a partial write # or a full write. Instead, we analyze the access # footprint later on to determine how much to reload/save. - gen[sched_idx].add(var) + gen[outline_idx].add(var) for var in insn.read_dependency_names(): if var not in self.kernel.temporary_variables: continue - gen[sched_idx].add(var) + gen[outline_idx].add(var) return gen, kill @@ -160,26 +160,26 @@ class LivenessAnalysis(object): def print_liveness(self): print(75 * "-") print("LIVE IN:") - for sched_idx, outline_item in enumerate(self.outline): + for outline_idx, outline_item in enumerate(self.outline): print("{item}: {{{vars}}}".format( - item=sched_idx, - vars=", ".join(sorted(self[sched_idx].live_in)))) + item=outline_idx, + vars=", ".join(sorted(self[outline_idx].live_in)))) print(75 * "-") print("LIVE OUT:") - for sched_idx, outline_item in enumerate(self.outline): + for outline_idx, outline_item in enumerate(self.outline): print("{item}: {{{vars}}}".format( - item=sched_idx, - vars=", ".join(sorted(self[sched_idx].live_out)))) + item=outline_idx, + vars=", ".join(sorted(self[outline_idx].live_out)))) print(75 * "-") - def __getitem__(self, sched_idx): + def __getitem__(self, outline_idx): """ :arg insn: An instruction name or instance of :class:`loopy.instruction.InstructionBase` :returns: A :class:`LivenessResult` associated with `insn` """ - return self.liveness()[sched_idx] + return self.liveness()[outline_idx] # }}} @@ -751,12 +751,12 @@ def save_and_reload_temporaries(knl): from loopy.outline.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - for sched_idx, outline_item in enumerate(knl.outline): + for outline_idx, outline_item in enumerate(knl.outline): if isinstance(outline_item, CallKernel): # Any written temporary that is live-out needs to be read into # memory because of the potential for partial writes. - if sched_idx == 0: + if outline_idx == 0: # Kernel entry: nothing live interesting_temporaries = set() else: @@ -765,13 +765,13 @@ def save_and_reload_temporaries(knl): temporaries_read_in_subkernel(knl, subkernel) | temporaries_written_in_subkernel(knl, subkernel)) - for temporary in liveness[sched_idx].live_out & interesting_temporaries: + for temporary in liveness[outline_idx].live_out & interesting_temporaries: logger.info("reloading {0} at entry of {1}" .format(temporary, outline_item.kernel_name)) saver.reload(temporary, outline_item.kernel_name) elif isinstance(outline_item, ReturnFromKernel): - if sched_idx == len(knl.outline) - 1: + if outline_idx == len(knl.outline) - 1: # Kernel exit: nothing live interesting_temporaries = set() else: @@ -779,7 +779,7 @@ def save_and_reload_temporaries(knl): interesting_temporaries = ( temporaries_written_in_subkernel(knl, subkernel)) - for temporary in liveness[sched_idx].live_in & interesting_temporaries: + for temporary in liveness[outline_idx].live_in & interesting_temporaries: logger.info("saving {0} before return of {1}" .format(temporary, outline_item.kernel_name)) saver.save(temporary, outline_item.kernel_name) -- GitLab From c7b1b6cc1dbe6cebcf12e0290ecb87aaf78fa20d Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:49:37 -0600 Subject: [PATCH 48/56] renamed sched_index->outline_index --- loopy/check.py | 12 ++++----- loopy/codegen/bounds.py | 8 +++--- loopy/codegen/control.py | 52 +++++++++++++++++++-------------------- loopy/codegen/loop.py | 24 +++++++++--------- loopy/outline/__init__.py | 16 ++++++------ loopy/target/ispc.py | 2 +- 6 files changed, 57 insertions(+), 57 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index e81fbbfed..4957f348f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -681,27 +681,27 @@ def pre_outline_checks(kernel): # {{{ check for unused hw axes -def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): +def _check_for_unused_hw_axes_in_kernel_chunk(kernel, outline_index=None): from loopy.outline import (CallKernel, RunInstruction, Barrier, EnterLoop, LeaveLoop, ReturnFromKernel, get_insn_ids_for_block_at, gather_outline_block) - if sched_index is None: + if outline_index is None: group_axes = set() local_axes = set() i = 0 loop_end_i = past_end_i = len(kernel.outline) else: - assert isinstance(kernel.outline[sched_index], CallKernel) - _, past_end_i = gather_outline_block(kernel.outline, sched_index) + assert isinstance(kernel.outline[outline_index], CallKernel) + _, past_end_i = gather_outline_block(kernel.outline, outline_index) group_size, local_size = kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at(kernel.outline, sched_index)) + get_insn_ids_for_block_at(kernel.outline, outline_index)) group_axes = set(ax for ax, length in enumerate(group_size)) local_axes = set(ax for ax, length in enumerate(local_size)) - i = sched_index + 1 + i = outline_index + 1 assert isinstance(kernel.outline[past_end_i - 1], ReturnFromKernel) loop_end_i = past_end_i - 1 diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index c1d05a2ea..c525a94e3 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -55,21 +55,21 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai # {{{ on which inames may a conditional depend? -def get_usable_inames_for_conditional(kernel, sched_index): +def get_usable_inames_for_conditional(kernel, outline_index): from loopy.outline import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, VectorizeTag, IlpBaseTag) - result = find_active_inames_at(kernel, sched_index) - crosses_barrier = has_barrier_within(kernel, sched_index) + result = find_active_inames_at(kernel, outline_index) + crosses_barrier = has_barrier_within(kernel, outline_index) # Find our containing subkernel. Grab inames for all insns from there. within_subkernel = False for outline_item_index, outline_item in enumerate( - kernel.outline[:sched_index]): + kernel.outline[:outline_index]): from loopy.outline import CallKernel, ReturnFromKernel if isinstance(outline_item, CallKernel): within_subkernel = True diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 18704da96..b711cf865 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -65,18 +65,18 @@ def synthesize_idis_for_extra_args(kernel, outline_index): return idis -def generate_code_for_sched_index(codegen_state, sched_index): +def generate_code_for_outline_index(codegen_state, outline_index): kernel = codegen_state.kernel - outline_item = kernel.outline[sched_index] + outline_item = kernel.outline[outline_index] if isinstance(outline_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.outline import (gather_outline_block, get_insn_ids_for_block_at) - _, past_end_i = gather_outline_block(kernel.outline, sched_index) + _, past_end_i = gather_outline_block(kernel.outline, outline_index) assert past_end_i <= codegen_state.outline_index_end - extra_args = synthesize_idis_for_extra_args(kernel, sched_index) + extra_args = synthesize_idis_for_extra_args(kernel, outline_index) new_codegen_state = codegen_state.copy( is_generating_device_code=True, @@ -87,10 +87,10 @@ def generate_code_for_sched_index(codegen_state, sched_index): from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( - new_codegen_state, sched_index) + new_codegen_state, outline_index) glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at(kernel.outline, sched_index)) + get_insn_ids_for_block_at(kernel.outline, outline_index)) return merge_codegen_results(codegen_state, [ codegen_result, @@ -126,7 +126,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): "for '%s', tagged '%s'" % (outline_item.iname, ", ".join(str(tag) for tag in tags))) - return func(codegen_state, sched_index) + return func(codegen_state, outline_index) elif isinstance(outline_item, Barrier): # {{{ emit barrier code @@ -175,10 +175,10 @@ def generate_code_for_sched_index(codegen_state, sched_index): % type(outline_item)) -def get_required_predicates(kernel, sched_index): +def get_required_predicates(kernel, outline_index): result = None for _, outline_item in generate_sub_outline_items( - kernel.outline, sched_index): + kernel.outline, outline_index): if isinstance(outline_item, Barrier): my_preds = frozenset() elif isinstance(outline_item, RunInstruction): @@ -227,7 +227,7 @@ def build_loop_nest(codegen_state, outline_index): # some work about hoisting conditionals and directly go into recursion. if not codegen_state.ast_builder.can_implement_conditionals: result = [] - inner = generate_code_for_sched_index(codegen_state, outline_index) + inner = generate_code_for_outline_index(codegen_state, outline_index) if inner is not None: result.append(inner) return merge_codegen_results(codegen_state, result) @@ -280,7 +280,7 @@ def build_loop_nest(codegen_state, outline_index): from loopy.outline import find_used_inames_within from loopy.codegen.bounds import get_usable_inames_for_conditional - sched_index_info_entries = [ + outline_index_info_entries = [ OutlineIndexInfo( outline_indices=[i], admissible_cond_inames=( @@ -291,8 +291,8 @@ def build_loop_nest(codegen_state, outline_index): for i in my_sched_indices ] - sched_index_info_entries = group_by( - sched_index_info_entries, + outline_index_info_entries = group_by( + outline_index_info_entries, key=lambda sii: ( sii.admissible_cond_inames, sii.required_predicates, @@ -328,7 +328,7 @@ def build_loop_nest(codegen_state, outline_index): return get_approximate_convex_bounds_checks(domain, check_inames, self.impl_domain) - def build_insn_group(sched_index_info_entries, codegen_state, + def build_insn_group(outline_index_info_entries, codegen_state, done_group_lengths=set()): """ :arg done_group_lengths: A set of group lengths (integers) that grows @@ -353,10 +353,10 @@ def build_loop_nest(codegen_state, outline_index): # build_insn_group calls itself for the remainder of outline indices # that were not in the hoist group. - if not sched_index_info_entries: + if not outline_index_info_entries: return [] - origin_si_entry = sched_index_info_entries[0] + origin_si_entry = outline_index_info_entries[0] current_iname_set = origin_si_entry.admissible_cond_inames current_pred_set = (origin_si_entry.required_predicates - codegen_state.implemented_predicates) @@ -372,18 +372,18 @@ def build_loop_nest(codegen_state, outline_index): found_hoists = [] candidate_group_length = 1 - while candidate_group_length <= len(sched_index_info_entries): + while candidate_group_length <= len(outline_index_info_entries): if candidate_group_length in done_group_lengths: candidate_group_length += 1 continue current_iname_set = ( current_iname_set - & sched_index_info_entries[candidate_group_length-1] + & outline_index_info_entries[candidate_group_length-1] .admissible_cond_inames) current_pred_set = ( current_pred_set - & sched_index_info_entries[candidate_group_length-1] + & outline_index_info_entries[candidate_group_length-1] .required_predicates) current_pred_set = frozenset( @@ -395,9 +395,9 @@ def build_loop_nest(codegen_state, outline_index): # And only generate conditionals for those. used_inames = set() - for sched_index_info_entry in \ - sched_index_info_entries[0:candidate_group_length]: - used_inames |= sched_index_info_entry.used_inames_within + for outline_index_info_entry in \ + outline_index_info_entries[0:candidate_group_length]: + used_inames |= outline_index_info_entry.used_inames_within # }}} @@ -457,7 +457,7 @@ def build_loop_nest(codegen_state, outline_index): def gen_code(inner_codegen_state): result = [] for i in origin_si_entry.outline_indices: - inner = generate_code_for_sched_index( + inner = generate_code_for_outline_index( inner_codegen_state, i) if inner is not None: @@ -469,7 +469,7 @@ def build_loop_nest(codegen_state, outline_index): # recurse with a bigger done_group_lengths def gen_code(inner_codegen_state): return build_insn_group( - sched_index_info_entries[0:group_length], + outline_index_info_entries[0:group_length], inner_codegen_state, done_group_lengths=( done_group_lengths | set([group_length]))) @@ -519,11 +519,11 @@ def build_loop_nest(codegen_state, outline_index): result = gen_code(new_codegen_state) return result + build_insn_group( - sched_index_info_entries[group_length:], codegen_state) + outline_index_info_entries[group_length:], codegen_state) # }}} - insn_group = build_insn_group(sched_index_info_entries, codegen_state) + insn_group = build_insn_group(outline_index_info_entries, codegen_state) return merge_codegen_results( codegen_state, insn_group) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index fd7bc7b5a..67111659f 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -116,10 +116,10 @@ def get_slab_decomposition(kernel, iname): # {{{ unrolled loops -def generate_unroll_loop(codegen_state, sched_index): +def generate_unroll_loop(codegen_state, outline_index): kernel = codegen_state.kernel - iname = kernel.outline[sched_index].iname + iname = kernel.outline[outline_index].iname bounds = kernel.get_iname_bounds(iname, constants_only=True) @@ -149,7 +149,7 @@ def generate_unroll_loop(codegen_state, sched_index): idx_aff = lower_bound_aff + i new_codegen_state = codegen_state.fix(iname, idx_aff) result.append( - build_loop_nest(new_codegen_state, sched_index+1)) + build_loop_nest(new_codegen_state, outline_index+1)) return merge_codegen_results(codegen_state, result) @@ -158,10 +158,10 @@ def generate_unroll_loop(codegen_state, sched_index): # {{{ vectorized loops -def generate_vectorize_loop(codegen_state, sched_index): +def generate_vectorize_loop(codegen_state, outline_index): kernel = codegen_state.kernel - iname = kernel.outline[sched_index].iname + iname = kernel.outline[outline_index].iname bounds = kernel.get_iname_bounds(iname, constants_only=True) @@ -175,7 +175,7 @@ def generate_vectorize_loop(codegen_state, sched_index): warn(kernel, "vec_upper_not_const", "upper bound for vectorized loop '%s' is not a constant, " "cannot vectorize--unrolling instead") - return generate_unroll_loop(codegen_state, sched_index) + return generate_unroll_loop(codegen_state, outline_index) length = int(pw_aff_to_expr(length_aff)) @@ -190,7 +190,7 @@ def generate_vectorize_loop(codegen_state, sched_index): warn(kernel, "vec_lower_not_0", "lower bound for vectorized loop '%s' is not zero, " "cannot vectorize--unrolling instead") - return generate_unroll_loop(codegen_state, sched_index) + return generate_unroll_loop(codegen_state, outline_index) # {{{ 'implement' vectorization bounds @@ -210,7 +210,7 @@ def generate_vectorize_loop(codegen_state, sched_index): length=length, space=length_aff.space)) - return build_loop_nest(new_codegen_state, sched_index+1) + return build_loop_nest(new_codegen_state, outline_index+1) # }}} @@ -343,18 +343,18 @@ def set_up_hw_parallel_loops(codegen_state, outline_index, next_func, # {{{ sequential loop -def generate_sequential_loop_dim_code(codegen_state, sched_index): +def generate_sequential_loop_dim_code(codegen_state, outline_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper - loop_iname = kernel.outline[sched_index].iname + loop_iname = kernel.outline[outline_index].iname slabs = get_slab_decomposition(kernel, loop_iname) from loopy.codegen.bounds import get_usable_inames_for_conditional # Note: this does not include loop_iname itself! - usable_inames = get_usable_inames_for_conditional(kernel, sched_index) + usable_inames = get_usable_inames_for_conditional(kernel, outline_index) domain = kernel.get_inames_domain(loop_iname) result = [] @@ -435,7 +435,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): .copy(kernel=intersect_kernel_with_slab( kernel, slab, loop_iname))) - inner = build_loop_nest(new_codegen_state, sched_index+1) + inner = build_loop_nest(new_codegen_state, outline_index+1) # }}} diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index b784c98fa..a65149140 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -157,11 +157,11 @@ def get_insn_ids_for_block_at(outline, start_idx): if isinstance(sub_outline_item, RunInstruction)) -def find_active_inames_at(kernel, sched_index): +def find_active_inames_at(kernel, outline_index): active_inames = [] from loopy.outline import EnterLoop, LeaveLoop - for outline_item in kernel.outline[:sched_index]: + for outline_item in kernel.outline[:outline_index]: if isinstance(outline_item, EnterLoop): active_inames.append(outline_item.iname) if isinstance(outline_item, LeaveLoop): @@ -170,12 +170,12 @@ def find_active_inames_at(kernel, sched_index): return set(active_inames) -def has_barrier_within(kernel, sched_index): - outline_item = kernel.outline[sched_index] +def has_barrier_within(kernel, outline_index): + outline_item = kernel.outline[outline_index] if isinstance(outline_item, BeginBlockItem): loop_contents, _ = gather_outline_block( - kernel.outline, sched_index) + kernel.outline, outline_index) from pytools import any return any(isinstance(suboutline_item, Barrier) for suboutline_item in loop_contents) @@ -185,12 +185,12 @@ def has_barrier_within(kernel, sched_index): return False -def find_used_inames_within(kernel, sched_index): - outline_item = kernel.outline[sched_index] +def find_used_inames_within(kernel, outline_index): + outline_item = kernel.outline[outline_index] if isinstance(outline_item, BeginBlockItem): loop_contents, _ = gather_outline_block( - kernel.outline, sched_index) + kernel.outline, outline_index) run_insns = [suboutline_item for suboutline_item in loop_contents if isinstance(suboutline_item, RunInstruction)] diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 01a65b3cb..3b6ccef52 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -302,7 +302,7 @@ class ISPCASTBuilder(CFamilyASTBuilder): else: raise LoopyError("unknown barrier kind") - def get_temporary_decl(self, codegen_state, sched_index, temp_var, decl_info): + def get_temporary_decl(self, codegen_state, outline_index, temp_var, decl_info): from loopy.target.c import POD # uses the correct complex type temp_var_decl = POD(self, decl_info.dtype, decl_info.name) -- GitLab From 31c16eb9166b692d18cc7aaa3faedc890c5e2799 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:50:53 -0600 Subject: [PATCH 49/56] renamed sub_sched->sub_outline --- loopy/outline/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index a65149140..aa62f6ba5 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -949,10 +949,10 @@ def generate_loop_outlines_internal( # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of outliner and see if more progress can be # made. - for sub_sched in generate_loop_outlines_internal( + for sub_outline in generate_loop_outlines_internal( new_outline_state, allow_boost=rec_allow_boost, debug=debug): - yield sub_sched + yield sub_outline if not outline_state.group_insn_counts: # No groups: We won't need to backtrack on outlining @@ -1046,7 +1046,7 @@ def generate_loop_outlines_internal( if can_leave and not debug_mode: - for sub_sched in generate_loop_outlines_internal( + for sub_outline in generate_loop_outlines_internal( outline_state.copy( outline=( outline_state.outline @@ -1059,7 +1059,7 @@ def generate_loop_outlines_internal( else outline_state.preoutline[1:]), ), allow_boost=rec_allow_boost, debug=debug): - yield sub_sched + yield sub_outline return @@ -1259,7 +1259,7 @@ def generate_loop_outlines_internal( iname), reverse=True): - for sub_sched in generate_loop_outlines_internal( + for sub_outline in generate_loop_outlines_internal( outline_state.copy( outline=( outline_state.outline @@ -1277,7 +1277,7 @@ def generate_loop_outlines_internal( allow_boost=rec_allow_boost, debug=debug): found_viable_outline = True - yield sub_sched + yield sub_outline if found_viable_outline: return @@ -1313,10 +1313,10 @@ def generate_loop_outlines_internal( else: if not allow_boost and allow_boost is not None: # try again with boosting allowed - for sub_sched in generate_loop_outlines_internal( + for sub_outline in generate_loop_outlines_internal( outline_state, allow_boost=True, debug=debug): - yield sub_sched + yield sub_outline else: # dead end if debug is not None: -- GitLab From c81b0e704d2975e2093bcfc02fecacb5ec81d2ed Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:52:17 -0600 Subject: [PATCH 50/56] renamed sched_indices->outline_indices --- loopy/codegen/control.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index b711cf865..04ceab09a 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -236,7 +236,7 @@ def build_loop_nest(codegen_state, outline_index): # i.e. go up to the next LeaveLoop, and skip over inner loops. - my_sched_indices = [] + my_outline_indices = [] i = outline_index while i < codegen_state.outline_index_end: @@ -245,7 +245,7 @@ def build_loop_nest(codegen_state, outline_index): if isinstance(outline_item, LeaveLoop): break - my_sched_indices.append(i) + my_outline_indices.append(i) if isinstance(outline_item, (EnterLoop, CallKernel)): _, i = gather_outline_block(kernel.outline, i) @@ -288,7 +288,7 @@ def build_loop_nest(codegen_state, outline_index): required_predicates=get_required_predicates(kernel, i), used_inames_within=find_used_inames_within(kernel, i) ) - for i in my_sched_indices + for i in my_outline_indices ] outline_index_info_entries = group_by( -- GitLab From 74d4360c3b2f3905ae3f48709649b031c14db5c0 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:53:16 -0600 Subject: [PATCH 51/56] renamed gen_sched->gen_outline --- loopy/outline/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index aa62f6ba5..6ffcbca7d 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1958,28 +1958,28 @@ def generate_loop_outlines_inner(kernel, debug_args={}): break try: - for gen_sched in generate_loop_outlines_internal( + for gen_outline in generate_loop_outlines_internal( outline_state, debug=debug, **outline_gen_kwargs): debug.stop() - gen_sched = convert_barrier_instructions_to_barriers( - kernel, gen_sched) + gen_outline = convert_barrier_instructions_to_barriers( + kernel, gen_outline) gsize, lsize = kernel.get_grid_size_upper_bounds() if (gsize or lsize): if not kernel.options.disable_global_barriers: logger.debug("%s: barrier insertion: global" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, + gen_outline = insert_barriers(kernel, gen_outline, synchronization_kind="global", verify_only=True) logger.debug("%s: barrier insertion: local" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, + gen_outline = insert_barriers(kernel, gen_outline, synchronization_kind="local", verify_only=False) logger.debug("%s: barrier insertion: done" % kernel.name) new_kernel = kernel.copy( - outline=gen_sched, + outline=gen_outline, state=KernelState.OUTLINED) from loopy.outline.device_mapping import \ -- GitLab From b20dc9aa45ca93eab16a500f4bf491a13765a146 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:54:17 -0600 Subject: [PATCH 52/56] renamed sched_cache_key->outline_cache_key --- loopy/outline/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index 6ffcbca7d..a3704d1d2 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -2047,12 +2047,12 @@ def get_one_scheduled_kernel(kernel): def get_one_outlined_kernel(kernel): from loopy import CACHING_ENABLED - sched_cache_key = kernel + outline_cache_key = kernel from_cache = False if CACHING_ENABLED: try: - result = outline_cache[sched_cache_key] + result = outline_cache[outline_cache_key] logger.debug("%s: outline cache hit" % kernel.name) from_cache = True @@ -2065,7 +2065,7 @@ def get_one_outlined_kernel(kernel): result = _get_one_outlined_kernel_inner(kernel) if CACHING_ENABLED and not from_cache: - outline_cache.store_if_not_present(sched_cache_key, result) + outline_cache.store_if_not_present(outline_cache_key, result) return result -- GitLab From 5fb78317d64e4a90d4920e846750b724435c5b17 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:55:14 -0600 Subject: [PATCH 53/56] renamed sched->outline --- loopy/outline/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/outline/__init__.py b/loopy/outline/__init__.py index a3704d1d2..9b9bd04b5 100644 --- a/loopy/outline/__init__.py +++ b/loopy/outline/__init__.py @@ -1841,8 +1841,8 @@ def generate_loop_outlines(kernel, debug_args={}): """ with MinRecursionLimitForOutlining(kernel): - for sched in generate_loop_outlines_inner(kernel, debug_args=debug_args): - yield sched + for outline in generate_loop_outlines_inner(kernel, debug_args=debug_args): + yield outline def generate_loop_outlines_inner(kernel, debug_args={}): -- GitLab From 20bf53f094ad38706f77902c292b9c94b52c9143 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:56:49 -0600 Subject: [PATCH 54/56] renamed nscheditems->n_outline_items --- loopy/transform/save.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index d88d154a4..cee1c65e3 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -55,9 +55,9 @@ class LivenessResult(dict): __slots__ = ["live_in", "live_out"] @classmethod - def make_empty(cls, nscheditems): + def make_empty(cls, n_outline_items): return cls((idx, cls.InstructionResult(live_in=set(), live_out=set())) - for idx in range(nscheditems)) + for idx in range(n_outline_items)) class LivenessAnalysis(object): @@ -765,7 +765,8 @@ def save_and_reload_temporaries(knl): temporaries_read_in_subkernel(knl, subkernel) | temporaries_written_in_subkernel(knl, subkernel)) - for temporary in liveness[outline_idx].live_out & interesting_temporaries: + for temporary in ( + liveness[outline_idx].live_out & interesting_temporaries): logger.info("reloading {0} at entry of {1}" .format(temporary, outline_item.kernel_name)) saver.reload(temporary, outline_item.kernel_name) -- GitLab From 39718c7e9226a7a4a2e6b76b418a31a8415a8e6e Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:57:42 -0600 Subject: [PATCH 55/56] renamed ref_sched_kernel->ref_outline_kernel --- loopy/auto_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 111dac092..62f3ee687 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -451,13 +451,13 @@ def auto_test_vs_ref( pp_ref_knl = lp.preprocess_kernel(ref_knl) for knl in lp.generate_loop_outlines(pp_ref_knl): - ref_sched_kernel = knl + ref_outline_kernel = knl break logger.info("%s (ref): trying %s for the reference calculation" % ( ref_knl.name, dev)) - ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel) + ref_compiled = CompiledKernel(ref_ctx, ref_outline_kernel) if not quiet and print_ref_code: print(75*"-") print("Reference Code:") @@ -469,7 +469,7 @@ def auto_test_vs_ref( try: ref_args, ref_arg_data = \ - make_ref_args(ref_sched_kernel, + make_ref_args(ref_outline_kernel, ref_kernel_info.implemented_data_info, ref_queue, parameters) ref_args["out_host"] = False -- GitLab From 36bb7ed3a1ad4a1d9bc628a09a05550657bc6d3c Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 24 Feb 2020 08:58:50 -0600 Subject: [PATCH 56/56] grammar fix -> --- loopy/transform/iname.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index afb7ea6c2..64dfdd8f8 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -98,7 +98,7 @@ def prioritize_loops(kernel, loop_priority): Priority is only considered if loop nesting is ambiguous. prioritize_loops can be used multiple times. If you do so, each given - *loop_priority* specifies a outlining constraint. The constraints from + *loop_priority* specifies an outlining constraint. The constraints from all calls to prioritize_loops together establish a partial order on the inames (see https://en.wikipedia.org/wiki/Partially_ordered_set). -- GitLab