diff --git a/examples/python/feature_usage/constrain_loop_nesting.py b/examples/python/feature_usage/constrain_loop_nesting.py new file mode 100644 index 0000000000000000000000000000000000000000..c42b12eafd41aae3237902d8d4ee596de44fbdac --- /dev/null +++ b/examples/python/feature_usage/constrain_loop_nesting.py @@ -0,0 +1,69 @@ +import numpy as np +import loopy as lp +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + +ref_knl = lp.make_kernel( + "{ [g,h,i,j,k]: 0<=g,h,i,j,k safe. continue + # if dep_insn_iname is concurrent, continue + # (parallel tags don't really nest, so disregard them here) if kernel.iname_tags_of_type(dep_insn_iname, (ConcurrentTag, IlpBaseTag)): - # Parallel tags don't really nest, so we'll disregard - # them here. continue + # if loop_nest_with_map says dep_insn_iname does not nest + # inside or around iname, it must be nested separately; + # we're safe, so continue if dep_insn_iname not in loop_nest_with_map.get(iname, []): - # dep_insn_iname does not nest with iname, so its nest - # must occur outside. continue + # if at least one of these three cases succeeds for every + # dep_insn_iname, we can add dep_insn to iname's set of insns + # in result dict, otherwise we cannot + may_add_to_loop_dep_map = False break @@ -334,6 +363,8 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): dep_insn=dep_insn_id, insn=insn.id)) + # add dep_insn to result[iname] + # (means dep_insn must be scheduled before entering iname loop) iname_dep.add(dep_insn_id) return result @@ -349,16 +380,17 @@ def group_insn_counts(kernel): return result -def gen_dependencies_except(kernel, insn_id, except_insn_ids): - insn = kernel.id_to_insn[insn_id] - for dep_id in insn.depends_on: +def gen_dependencies_except(kernel, insn_id, except_insn_ids, + insn_depends_on_graph): + for dep_id in insn_depends_on_graph.get(insn_id, set()): if dep_id in except_insn_ids: continue yield dep_id - for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_insn_ids): + for sub_dep_id in gen_dependencies_except(kernel, dep_id, + except_insn_ids, insn_depends_on_graph): yield sub_dep_id @@ -642,9 +674,10 @@ class SchedulerState(ImmutableRecord): Used to produce warnings about deprecated 'boosting' behavior Should be removed along with boostability in 2017.x. """ + # TODO document insn_depends_on_graph @property - def last_entered_loop(self): + def deepest_active_iname(self): if self.active_inames: return self.active_inames[-1] else: @@ -652,25 +685,36 @@ class SchedulerState(ImmutableRecord): def generate_loop_schedules_internal( - sched_state, allow_boost=False, debug=None): + sched_state, allow_boost=False, debug=None, _depth_ctr=0): + # TODO remove _depth_ctr (just here for debugging) + #_print_depth_ctr = True + _print_depth_ctr = False + # allow_insn is set to False initially and after entering each loop # to give loops containing high-priority instructions a chance. kernel = sched_state.kernel Fore = kernel.options._fore # noqa Style = kernel.options._style # noqa + # TODO ignore boost for now + # {{{ if allow_boost is None: rec_allow_boost = None else: rec_allow_boost = False + # }}} active_inames_set = frozenset(sched_state.active_inames) + # TODO ignore preschedule for now + # {{{ next_preschedule_item = ( sched_state.preschedule[0] if len(sched_state.preschedule) > 0 else None) + # }}} + # TODO ignore debug for now # {{{ decide about debug mode debug_mode = False @@ -708,6 +752,7 @@ def generate_loop_schedules_internal( # }}} + # TODO ignore preschedule for now # {{{ see if we have reached the start/end of kernel in the preschedule if isinstance(next_preschedule_item, CallKernel): @@ -739,12 +784,14 @@ def generate_loop_schedules_internal( # }}} + # TODO ignore preschedule for now # {{{ see if there are pending barriers in the preschedule # Barriers that do not have an originating instruction are handled here. # (These are automatically inserted by insert_barriers().) Barriers with # originating instructions are handled as part of normal instruction # scheduling below. + if ( isinstance(next_preschedule_item, Barrier) and next_preschedule_item.originating_insn_id is None): @@ -782,18 +829,23 @@ def generate_loop_schedules_internal( else: insn_ids_to_try = sched_state.insn_ids_to_try + # TODO ignore preschedule for now + # {{{ insn_ids_to_try.extend( insn_id for item in sched_state.preschedule for insn_id in sched_item_to_insn_id(item)) + # }}} for insn_id in insn_ids_to_try: insn = kernel.id_to_insn[insn_id] - is_ready = insn.depends_on <= sched_state.scheduled_insn_ids - + # make sure dependees have been scheduled + is_ready = sched_state.insn_depends_on_graph.get( + insn_id, set()) <= sched_state.scheduled_insn_ids if not is_ready: if debug_mode: + # debug message {{{ # These are not that interesting when understanding scheduler # failures. @@ -801,31 +853,42 @@ def generate_loop_schedules_internal( # format_insn(kernel, insn.id), ",".join( # insn.depends_on - sched_state.scheduled_insn_ids))) pass + # }}} continue - want = kernel.insn_inames(insn) - sched_state.parallel_inames - have = active_inames_set - sched_state.parallel_inames + nonconc_insn_inames = kernel.insn_inames(insn) - sched_state.parallel_inames + nonconc_active_inames = active_inames_set - sched_state.parallel_inames # If insn is boostable, it may be placed inside a more deeply # nested loop without harm. - orig_have = have + orig_nonconc_active_inames = nonconc_active_inames + # TODO ignore boost for now + # {{{ if allow_boost: # Note that the inames in 'insn.boostable_into' necessarily won't - # be contained in 'want'. - have = have - insn.boostable_into + # be contained in 'nonconc_insn_inames'. + nonconc_active_inames = nonconc_active_inames - insn.boostable_into + # }}} - if want != have: + if nonconc_insn_inames != nonconc_active_inames: + # We don't have the inames we need, may need to open more loops is_ready = False + # TODO ignore debug for now + # debug message {{{ if debug_mode: - if want-have: + if nonconc_insn_inames-nonconc_active_inames: print("instruction '%s' is missing inames '%s'" - % (format_insn(kernel, insn.id), ",".join(want-have))) - if have-want: + % (format_insn(kernel, insn.id), + ",".join(nonconc_insn_inames-nonconc_active_inames))) + if nonconc_active_inames-nonconc_insn_inames: print("instruction '%s' won't work under inames '%s'" - % (format_insn(kernel, insn.id), ",".join(have-want))) + % (format_insn(kernel, insn.id), + ",".join(nonconc_active_inames-nonconc_insn_inames))) + # }}} + # TODO ignore preschedule for now # {{{ check if scheduling this insn is compatible with preschedule if insn_id in sched_state.prescheduled_insn_ids: @@ -845,50 +908,106 @@ def generate_loop_schedules_internal( # }}} + # TODO ignoring global barriers for now + # {{{ if global barrier, is it allowed?, if not, we must be within subkernel + # to schedule insn (any kernel that does not have subkernels) + # {{{ check if scheduler state allows insn scheduling from loopy.kernel.instruction import BarrierInstruction + # TODO (?)could save some time by skipping ahead if we know is_ready=False if isinstance(insn, BarrierInstruction) and \ insn.synchronization_kind == "global": if not sched_state.may_schedule_global_barriers: + # debug message {{{ if debug_mode: print("can't schedule '%s' because global barriers are " "not currently allowed" % format_insn(kernel, insn.id)) + # }}} is_ready = False else: if not sched_state.within_subkernel: + # debug message {{{ if debug_mode: print("can't schedule '%s' because not within subkernel" % format_insn(kernel, insn.id)) + # }}} is_ready = False - + # }}} # }}} + # TODO ignore insn groups for now # {{{ determine group-based readiness if insn.conflicts_with_groups & active_groups: is_ready = False + # debug message {{{ if debug_mode: print("instruction '%s' conflicts with active group(s) '%s'" % (insn.id, ",".join( active_groups & insn.conflicts_with_groups))) + # }}} # }}} - # {{{ determine reachability + # {{{ determine reachability (no active inames conflict w/insn, but + # may need more inames) - if (not is_ready and have <= want): + if (not is_ready and nonconc_active_inames <= nonconc_insn_inames): + # no active inames conflict with insn, but we may need more active inames reachable_insn_ids.add(insn_id) # }}} + # {{{ is_ready debug message if is_ready and debug_mode: print("ready to schedule '%s'" % format_insn(kernel, insn.id)) + # }}} + + # {{{ check to see if adding insn_id violates dependencies 2.0 + + # REQUIRES schedule.checker (to be renamed to linearization.checker) + if is_ready: + from loopy.schedule.checker import check_linearization_validity + + # get IDs of insns that will have been scheduled if we schedule insn + # TODO (For now, ignoring barriers) + hypothetical_scheduled_ids = set( + [item.insn_id for item in sched_state.schedule + if isinstance(item, RunInstruction)] + + [insn.id, ]) + + # get subset of dependencies to check + # (deps s.t. before+after insn have been scheduled) + relevant_deps = set() + #for statement_pair_dep_set in kernel.dependencies: + for insn_id_before, insn_id_after, constraint_map in kernel.dependencies: + # TODO update after dep refactoring + if (insn_id_before in hypothetical_scheduled_ids + and insn_id_after in hypothetical_scheduled_ids): + relevant_deps.add( + (insn_id_before, insn_id_after, constraint_map)) + + # make sure currently scheduled items don't violate deps + if relevant_deps: + schedule_items = sched_state.schedule[:] + ( + RunInstruction(insn_id=insn.id), ) + sched_supports_deps = check_linearization_validity( + kernel, + relevant_deps, + schedule_items) + + if not sched_supports_deps: + is_ready = False + # }}} if is_ready and not debug_mode: + # schedule this instruction and recurse iid_set = frozenset([insn.id]) + # TODO ignore insn groups for now: + # new_active_group_counts = sched_state.active_group_counts # {{{ update active group counts for added instruction if insn.groups: @@ -914,17 +1033,23 @@ def generate_loop_schedules_internal( new_insn_ids_to_try.remove(insn.id) # invalidate instruction_ids_to_try when active group changes + # TODO ignore insn groups for now: + # {{{ if set(new_active_group_counts.keys()) != set( sched_state.active_group_counts.keys()): new_insn_ids_to_try = None + # }}} # }}} new_uses_of_boostability = [] + # TODO ignore boost for now + # {{{ if allow_boost: - if orig_have & insn.boostable_into: + if orig_nonconc_active_inames & insn.boostable_into: new_uses_of_boostability.append( - (insn.id, orig_have & insn.boostable_into)) + (insn.id, orig_nonconc_active_inames & insn.boostable_into)) + # }}} new_sched_state = sched_state.copy( scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, @@ -957,44 +1082,59 @@ def generate_loop_schedules_internal( # }}} + # No insns are ready to be scheduled now, but some may be reachable + # reachable_insn_ids = no active inames conflict w/insn, but may need more inames + # {{{ see if we're ready to leave the innermost loop - last_entered_loop = sched_state.last_entered_loop + deepest_active_iname = sched_state.deepest_active_iname - if last_entered_loop is not None: + if deepest_active_iname is not None: can_leave = True + # TODO ignore preschedule for now + # {{{ if ( - last_entered_loop in sched_state.prescheduled_inames + deepest_active_iname in sched_state.prescheduled_inames and not ( isinstance(next_preschedule_item, LeaveLoop) - and next_preschedule_item.iname == last_entered_loop)): + and next_preschedule_item.iname == deepest_active_iname)): # A prescheduled loop can only be left if the preschedule agrees. + # debug message {{{ if debug_mode: print("cannot leave '%s' because of preschedule constraints" - % last_entered_loop) + % deepest_active_iname) + # }}} can_leave = False - elif last_entered_loop not in sched_state.breakable_inames: + # }}} + elif deepest_active_iname not in sched_state.breakable_inames: # If the iname is not breakable, then check that we've # scheduled all the instructions that require it. for insn_id in sched_state.unscheduled_insn_ids: insn = kernel.id_to_insn[insn_id] - if last_entered_loop in kernel.insn_inames(insn): + if deepest_active_iname in kernel.insn_inames(insn): + # cannot leave deepest_active_iname; insn still depends on it + # TODO ignore debug for now + # {{{ if debug_mode: print("cannot leave '%s' because '%s' still depends on it" - % (last_entered_loop, format_insn(kernel, insn.id))) + % (deepest_active_iname, format_insn(kernel, insn.id))) # check if there's a dependency of insn that needs to be - # outside of last_entered_loop. - for subdep_id in gen_dependencies_except(kernel, insn_id, - sched_state.scheduled_insn_ids): + # outside of deepest_active_iname. + for subdep_id in gen_dependencies_except( + kernel, insn_id, + sched_state.scheduled_insn_ids, + sched_state.insn_depends_on_graph): subdep = kernel.id_to_insn[insn_id] - want = (kernel.insn_inames(subdep_id) + nonconc_insn_inames = (kernel.insn_inames(subdep_id) - sched_state.parallel_inames) if ( - last_entered_loop not in want and - last_entered_loop not in subdep.boostable_into): + deepest_active_iname not in nonconc_insn_inames + and + deepest_active_iname not in subdep.boostable_into + ): print( "%(warn)swarning:%(reset_all)s '%(iname)s', " "which the schedule is " @@ -1008,12 +1148,13 @@ def generate_loop_schedules_internal( % { "warn": Fore.RED + Style.BRIGHT, "reset_all": Style.RESET_ALL, - "iname": last_entered_loop, + "iname": deepest_active_iname, "subdep": format_insn_id(kernel, subdep_id), "dep": format_insn_id(kernel, insn_id), "subdep_i": format_insn(kernel, subdep_id), "dep_i": format_insn(kernel, insn_id), }) + # }}} can_leave = False break @@ -1035,22 +1176,52 @@ def generate_loop_schedules_internal( if ignore_count: ignore_count -= 1 else: - assert sched_item.iname == last_entered_loop + assert sched_item.iname == deepest_active_iname if seen_an_insn: can_leave = True break + # don't leave if must_nest constraints require that + # additional inames be nested inside the current iname + if can_leave: + must_nest_graph = ( + sched_state.kernel.loop_nest_constraints.must_nest_graph + if sched_state.kernel.loop_nest_constraints else None) + + if must_nest_graph: + # get inames that must nest inside the current iname + must_nest_inside = must_nest_graph[deepest_active_iname] + + if must_nest_inside: + # get scheduled inames that are nested inside current iname + encountered_iname = False + actually_nested_inside = set() + for sched_item in sched_state.schedule: + if isinstance(sched_item, EnterLoop): + if encountered_iname: + actually_nested_inside.add(sched_item.iname) + elif sched_item.iname == deepest_active_iname: + encountered_iname = True + elif (isinstance(sched_item, LeaveLoop) and + sched_item.iname == deepest_active_iname): + break + + # don't leave if must_nest constraints require that + # additional inames be nested inside the current iname + if not must_nest_inside.issubset(actually_nested_inside): + can_leave = False + if can_leave and not debug_mode: for sub_sched in generate_loop_schedules_internal( sched_state.copy( schedule=( sched_state.schedule - + (LeaveLoop(iname=last_entered_loop),)), + + (LeaveLoop(iname=deepest_active_iname),)), active_inames=sched_state.active_inames[:-1], preschedule=( sched_state.preschedule - if last_entered_loop + if deepest_active_iname not in sched_state.prescheduled_inames else sched_state.preschedule[1:]), ), @@ -1061,23 +1232,28 @@ def generate_loop_schedules_internal( # }}} + # We're not ready to leave the innermost loop... + # {{{ see if any loop can be entered now # Find inames that are being referenced by as yet unscheduled instructions. - needed_inames = set() + unsched_insn_inames_nonconc_still_needed = set() for insn_id in sched_state.unscheduled_insn_ids: - needed_inames.update(kernel.insn_inames(insn_id)) + unsched_insn_inames_nonconc_still_needed.update(kernel.insn_inames(insn_id)) - needed_inames = (needed_inames + unsched_insn_inames_nonconc_still_needed = ( + unsched_insn_inames_nonconc_still_needed # There's no notion of 'entering' a parallel loop - sched_state.parallel_inames - # Don't reenter a loop we're already in. - active_inames_set) + # {{{ debug msg + if debug_mode: print(75*"-") - print("inames still needed :", ",".join(needed_inames)) + print("inames still needed :", + ",".join(unsched_insn_inames_nonconc_still_needed)) print("active inames :", ",".join(sched_state.active_inames)) print("inames entered so far :", ",".join(sched_state.entered_inames)) print("reachable insns:", ",".join(reachable_insn_ids)) @@ -1086,13 +1262,24 @@ def generate_loop_schedules_internal( for grp, c in six.iteritems(sched_state.active_group_counts))) print(75*"-") - if needed_inames: + # }}} + + if unsched_insn_inames_nonconc_still_needed: iname_to_usefulness = {} - for iname in needed_inames: + currently_accessible_inames = ( + active_inames_set | sched_state.parallel_inames) + + for iname in unsched_insn_inames_nonconc_still_needed: - # {{{ check if scheduling this iname now is allowed/plausible + # check if scheduling this iname now is allowed/plausible based on + # preschedule constraints, loop_nest_around_map, + # loop insn dependency map, and data dependencies, + # if not, continue + # {{{ check if scheduling this iname now is allowed/plausible based on ^ + # TODO ignore preschedule for now + # {{{ if ( iname in sched_state.prescheduled_inames and not ( @@ -1103,18 +1290,28 @@ def generate_loop_schedules_internal( % iname) continue - currently_accessible_inames = ( - active_inames_set | sched_state.parallel_inames) + # }}} + + # check loop_nest_around_map to determine whether inames that must + # nest around iname are available + # {{{ if ( not sched_state.loop_nest_around_map[iname] <= currently_accessible_inames): if debug_mode: print("scheduling %s prohibited by loop nest-around map" % iname) continue + # }}} + # loop_insn_dep_map: dict mapping inames to other insn ids that need to + # be scheduled before the iname should be eligible for scheduling. + # {{{ if loop dependency map prohibits scheduling of iname, continue if ( not sched_state.loop_insn_dep_map.get(iname, set()) <= sched_state.scheduled_insn_ids): + # scheduling {iname} prohibited by loop dependency map + # (needs '{needed_insns})' + # debug message {{{ if debug_mode: print( "scheduling {iname} prohibited by loop dependency map " @@ -1125,8 +1322,10 @@ def generate_loop_schedules_internal( sched_state.loop_insn_dep_map.get(iname, set()) - sched_state.scheduled_insn_ids))) + # }}} continue + # }}} iname_home_domain = kernel.domains[kernel.get_home_domain_index(iname)] from islpy import dim_type @@ -1142,6 +1341,8 @@ def generate_loop_schedules_internal( # Check if any parameters are temporary variables, and if so, if their # writes have already been scheduled. + # TODO ignore data dependency for now + # {{{ data_dep_written = True for domain_par in ( iname_home_domain_params @@ -1155,130 +1356,168 @@ def generate_loop_schedules_internal( "parameter '%s' is not yet available" % (iname, domain_par)) break + # }}} if not data_dep_written: continue # }}} - # {{{ determine if that gets us closer to being able to schedule an insn + # so far, scheduling of iname is allowed/plausible + + # {{{ does entering iname get us closer to scheduling an insn? usefulness = None # highest insn priority enabled by iname + # suppose we were to activate this iname... + # would that get us closer to scheduling an insn? hypothetically_active_loops = active_inames_set | set([iname]) + # reachable_insn_ids = + # no active inames conflict w/insn, but may need more inames for insn_id in reachable_insn_ids: insn = kernel.id_to_insn[insn_id] - want = kernel.insn_inames(insn) | insn.boostable_into + inames_wanted_for_insn = ( + kernel.insn_inames(insn) | insn.boostable_into) - if hypothetically_active_loops <= want: + if hypothetically_active_loops <= inames_wanted_for_insn: if usefulness is None: usefulness = insn.priority else: usefulness = max(usefulness, insn.priority) if usefulness is None: + # {{{ iname won't get us closer to scheduling insn; debug msg, cont. if debug_mode: print("iname '%s' deemed not useful" % iname) continue + # }}} iname_to_usefulness[iname] = usefulness # }}} + # iname_to_usefulness.keys: inames that get us closer to scheduling an insn + # {{{ tier building - # Build priority tiers. If a schedule is found in the first tier, then - # loops in the second are not even tried (and so on). - loop_priority_set = set().union(*[set(prio) - for prio in - sched_state.kernel.loop_priority]) + # inames not yet entered that would get us closer to scheduling an insn: useful_loops_set = set(six.iterkeys(iname_to_usefulness)) - useful_and_desired = useful_loops_set & loop_priority_set + if _print_depth_ctr: # TODO remove + print(" "*_depth_ctr+"tier building"+"."*60) + print( + " "*_depth_ctr+"useful inames including ilp:", + useful_loops_set + ) - if useful_and_desired: - wanted = ( - useful_and_desired - - sched_state.ilp_inames - - sched_state.vec_inames + from loopy.transform.iname import ( + check_all_must_not_nests, + ) + from loopy.tools import ( + get_graph_sources, + ) + from pytools.graph import compute_induced_subgraph + + # since vec_inames must be innermost, + # they are not valid canidates unless only vec_inames remain + if useful_loops_set - sched_state.vec_inames: + useful_loops_set -= sched_state.vec_inames + + # to enter an iname without violating must_nest constraints, + # iname must be a source in the induced subgraph of must_nest_graph + # containing inames in useful_loops_set + must_nest_graph_full = ( + sched_state.kernel.loop_nest_constraints.must_nest_graph + if sched_state.kernel.loop_nest_constraints else None) + if must_nest_graph_full: + must_nest_graph_useful = compute_induced_subgraph( + must_nest_graph_full, + useful_loops_set ) - priority_tiers = [t for t in - get_priority_tiers(wanted, - sched_state.kernel.loop_priority - ) - ] - - # Update the loop priority set, because some constraints may have - # have been contradictary. - loop_priority_set = set().union(*[set(t) for t in priority_tiers]) - - priority_tiers.append( - useful_loops_set - - loop_priority_set - - sched_state.ilp_inames - - sched_state.vec_inames - ) + source_inames = get_graph_sources(must_nest_graph_useful) + else: + source_inames = useful_loops_set + + # since graph has a key for every iname, + # sources should be the only valid iname candidates + + # check whether entering any source_inames violates + # must-not-nest constraints + must_not_nest_constraints = ( + sched_state.kernel.loop_nest_constraints.must_not_nest + if sched_state.kernel.loop_nest_constraints else None) + if must_not_nest_constraints: + next_iname_candidates = set() + for next_iname in source_inames: + iname_orders_to_check = [ + (active_iname, next_iname) + for active_iname in active_inames_set] + + if check_all_must_not_nests( + iname_orders_to_check, must_not_nest_constraints): + next_iname_candidates.add(next_iname) else: - priority_tiers = [ - useful_loops_set - - sched_state.ilp_inames - - sched_state.vec_inames - ] - - # vectorization must be the absolute innermost loop - priority_tiers.extend([ - [iname] - for iname in sched_state.ilp_inames - if iname in useful_loops_set - ]) - - priority_tiers.extend([ - [iname] - for iname in sched_state.vec_inames - if iname in useful_loops_set - ]) + next_iname_candidates = source_inames + + if _print_depth_ctr: # TODO remove + print(" "*_depth_ctr+"TIERS INIT ======================================") + _depth_ctr += 1 + if _print_depth_ctr: # TODO remove + print(" "*_depth_ctr+"sources:", next_iname_candidates) # }}} if debug_mode: print("useful inames: %s" % ",".join(useful_loops_set)) else: - for tier in priority_tiers: - found_viable_schedule = False - - for iname in sorted(tier, - key=lambda iname: ( - iname_to_usefulness.get(iname, 0), - # Sort by iname to achieve deterministic - # ordering of generated schedules. - iname), - reverse=True): - - for sub_sched in generate_loop_schedules_internal( - sched_state.copy( - schedule=( - sched_state.schedule - + (EnterLoop(iname=iname),)), - active_inames=( - sched_state.active_inames + (iname,)), - entered_inames=( - sched_state.entered_inames - | frozenset((iname,))), - preschedule=( - sched_state.preschedule - if iname not in sched_state.prescheduled_inames - else sched_state.preschedule[1:]), - ), - allow_boost=rec_allow_boost, - debug=debug): - found_viable_schedule = True - yield sub_sched - - if found_viable_schedule: - return + if _print_depth_ctr: # TODO remove + print(" "*_depth_ctr+"LOOP OVER CANDIDATES ------------------------") + print( + " "*_depth_ctr+"loop over these candidates:", + next_iname_candidates) + found_viable_schedule = False + + # loop over iname candidates; enter inames and recurse: + for iname in sorted(next_iname_candidates, + key=lambda iname: ( + iname_to_usefulness.get(iname, 0), + # Sort by iname to achieve deterministic + # ordering of generated schedules. + iname), + reverse=True): + if _print_depth_ctr: # TODO remove + print(" "*(_depth_ctr+1)+"loop over iname candidates:", iname) + + # enter the loop and recurse + for sub_sched in generate_loop_schedules_internal( + sched_state.copy( + schedule=( + sched_state.schedule + + (EnterLoop(iname=iname),)), + active_inames=( + sched_state.active_inames + (iname,)), + entered_inames=( + sched_state.entered_inames + | frozenset((iname,))), + preschedule=( + sched_state.preschedule + if iname not in sched_state.prescheduled_inames + else sched_state.preschedule[1:]), + ), + allow_boost=rec_allow_boost, + debug=debug, + _depth_ctr=_depth_ctr): + + found_viable_schedule = True + yield sub_sched + # TODO what happened if found_viable_schedule is false? + if found_viable_schedule: + return + _depth_ctr -= 1 # }}} + # debug instructions for user {{{ if debug_mode: print(75*"=") inp = six.moves.input("Hit Enter for next schedule, " @@ -1286,14 +1525,37 @@ def generate_loop_schedules_internal( "different length:") if inp: raise ScheduleDebugInput(inp) + # }}} + + # make sure must_nest_constraints satisfied + # (the check above avoids contradicting some must_nest constraints, + # but we don't know if all required nestings are present) + # TODO is this the only place we need to check all must_nest constraints? + from loopy.transform.iname import ( + get_iname_nestings, + is_loop_nesting_valid, + ) + must_nest_constraints = (sched_state.kernel.loop_nest_constraints.must_nest + if sched_state.kernel.loop_nest_constraints else None) + if must_nest_constraints: + sched_tiers = get_iname_nestings(sched_state.schedule) + must_constraints_satisfied = is_loop_nesting_valid( + sched_tiers, must_nest_constraints, + must_not_nest_constraints=None, # (checked upon loop creation) + all_inames=kernel.all_inames()) + else: + must_constraints_satisfied = True if ( not sched_state.active_inames and not sched_state.unscheduled_insn_ids - and not sched_state.preschedule): + and not sched_state.preschedule + and must_constraints_satisfied): # if done, yield result debug.log_success(sched_state.schedule) + # TODO ignore boost for now + # {{{ for boost_insn_id, boost_inames in sched_state.uses_of_boostability: warn_with_kernel( kernel, "used_boostability", @@ -1302,6 +1564,7 @@ def generate_loop_schedules_internal( "This is deprecated and will stop working in loopy 2017.x." % (boost_insn_id, ", ".join(boost_inames)), DeprecationWarning) + # }}} yield sched_state.schedule @@ -1311,6 +1574,7 @@ def generate_loop_schedules_internal( for sub_sched in generate_loop_schedules_internal( sched_state, allow_boost=True, debug=debug): + # TODO check to make sure must_nest constraints satisfied? yield sub_sched else: # dead end @@ -1848,6 +2112,29 @@ def generate_loop_schedules_inner(kernel, debug_args={}): from loopy.check import pre_schedule_checks pre_schedule_checks(kernel) + # make sure legacy dependencies have become contemporary dependencies + # TODO move this to proper location and attach deps to individual stmts + # TODO update after dep refactoring + if any(insn.depends_on for insn in kernel.instructions): + warn_with_kernel( + kernel, "legacy_dependencies_found", + "Legacy dependencies found in kernel, creating " + "corresponding new dependencies before scheduling. " + "This may also be accomplished with the following script:\n\n" + "from loopy.schedule.checker import " + "create_dependencies_from_legacy_knl\n" + "deps = create_dependencies_from_legacy_knl(knl)\n" + "knl = lp.add_dependencies_v2(knl, deps)\n\n" + ) + from loopy.schedule.checker import ( + create_dependencies_from_legacy_knl, + ) + from loopy.transform.instruction import ( + add_dependencies_v2, + ) + deps = create_dependencies_from_legacy_knl(kernel) + kernel = add_dependencies_v2(kernel, deps) + schedule_count = 0 debug = ScheduleDebugger(**debug_args) @@ -1881,13 +2168,44 @@ def generate_loop_schedules_inner(kernel, debug_args={}): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) + + # {{{ create dependency graph with edges from depender* to dependee* + # iff intersection (SAME_map & DEP_map) is not empty + + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + from loopy.schedule.checker.utils import ( + create_graph_from_pairs, + get_concurrent_inames, + ) + + _, non_conc_inames = get_concurrent_inames(kernel) + legacy_deps_filtered_by_same = filter_deps_by_intersection_with_SAME( + kernel, + kernel.dependencies, # TODO update after dependency refactoring + non_conc_inames, + ) + + # get dep graph edges with edges from depender->dependee + dep_graph_pairs = [ + (insn_id_before, insn_id_after) + for insn_id_before, insn_id_after, _ in legacy_deps_filtered_by_same] + + # create dep graph from edges + insn_depends_on_graph = create_graph_from_pairs(dep_graph_pairs) + + # }}} + sched_state = SchedulerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, loop_insn_dep_map=find_loop_insn_dep_map( kernel, loop_nest_with_map=loop_nest_with_map, - loop_nest_around_map=loop_nest_around_map), + loop_nest_around_map=loop_nest_around_map, + insn_depends_on_graph=insn_depends_on_graph), + insn_depends_on_graph=insn_depends_on_graph, breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, @@ -2015,7 +2333,7 @@ schedule_cache = WriteOncePersistentDict( key_builder=LoopyKeyBuilder()) -def _get_one_scheduled_kernel_inner(kernel): +def _get_one_scheduled_kernel_inner(kernel, debug_args={}): # This helper function exists to ensure that the generator chain is fully # out of scope after the function returns. This allows it to be # garbage-collected in the exit handler of the @@ -2025,19 +2343,21 @@ def _get_one_scheduled_kernel_inner(kernel): # # See https://gitlab.tiker.net/inducer/sumpy/issues/31 for context. - return next(iter(generate_loop_schedules(kernel))) + return next(iter( + generate_loop_schedules(kernel, debug_args=debug_args))) -def get_one_scheduled_kernel(kernel): +def get_one_scheduled_kernel(kernel, debug_args={}): warn_with_kernel( kernel, "get_one_scheduled_kernel_deprecated", "get_one_scheduled_kernel is deprecated. " "Use get_one_linearized_kernel instead.", DeprecationWarning) - return get_one_linearized_kernel(kernel) + return get_one_linearized_kernel(kernel, debug_args) + +def get_one_linearized_kernel(kernel, debug_args={}): -def get_one_linearized_kernel(kernel): from loopy import CACHING_ENABLED sched_cache_key = kernel @@ -2055,7 +2375,8 @@ def get_one_linearized_kernel(kernel): if not from_cache: with ProcessLogger(logger, "%s: schedule" % kernel.name): with MinRecursionLimitForScheduling(kernel): - result = _get_one_scheduled_kernel_inner(kernel) + result = _get_one_scheduled_kernel_inner( + kernel, debug_args=debug_args) if CACHING_ENABLED and not from_cache: schedule_cache.store_if_not_present(sched_cache_key, result) diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py index 716a0cb58cc4e6ecddbbc3231583d9ddc2a9ef5a..aa1e671616013ed678b5b3adc33e22829466de5f 100644 --- a/loopy/schedule/checker/__init__.py +++ b/loopy/schedule/checker/__init__.py @@ -148,3 +148,231 @@ def get_schedule_for_statement_pair( # }}} # }}} + + +def create_dependencies_from_legacy_knl(knl): + """Return a list of + :class:`loopy.schedule.checker.dependency.TBD` + instances created for a :class:`loopy.LoopKernel` containing legacy + depencencies. + + Create the new dependencies according to the following rules: + + (1) If a dependency exists between ``insn0`` and ``insn1``, create the + dependnecy ``SAME(SNC)`` where ``SNC`` is the set of non-concurrent inames + used by both ``insn0`` and ``insn1``, and ``SAME`` is the relationship + specified by the ``SAME`` attribute of + :class:`loopy.schedule.checker.dependency.DependencyType`. + + (2) For each subset of non-concurrent inames used by any instruction, + + (a), find the set of all instructions using those inames, + + (b), create a directed graph with these instructions as nodes and + edges representing a 'happens before' relationship specfied by + each dependency, + + (c), find the sources and sinks within this graph, and + + (d), connect each sink to each source (sink happens before source) + with a ``PRIOR(SNC)`` dependency, where ``PRIOR`` is the + relationship specified by the ``PRIOR`` attribute of + :class:`loopy.schedule.checker.dependency.DependencyType`. + + """ + + from loopy.schedule.checker.dependency import ( + create_dependency_constraint, + get_dependency_sources_and_sinks, + StatementPairDependencySet, + DependencyType as dt, + ) + from loopy.schedule.checker.utils import ( + get_concurrent_inames, + get_all_nonconcurrent_insn_iname_subsets, + get_linearization_item_ids_within_inames, + ) + from loopy.schedule.checker.schedule import StatementRef + + # Preprocess if not already preprocessed + # note: kernels must always be preprocessed before scheduling + from loopy import preprocess_kernel + preprocessed_knl = preprocess_kernel(knl) + + # Create StatementPairDependencySet(s) from kernel dependencies + spds = set() + + # Introduce SAME dep for set of shared, non-concurrent inames + + conc_inames, non_conc_inames = get_concurrent_inames(preprocessed_knl) + for insn_after in preprocessed_knl.instructions: + for insn_before_id in insn_after.depends_on: + insn_before = preprocessed_knl.id_to_insn[insn_before_id] + insn_before_inames = insn_before.within_inames + insn_after_inames = insn_after.within_inames + shared_inames = insn_before_inames & insn_after_inames + shared_non_conc_inames = shared_inames & non_conc_inames + + spds.add( + StatementPairDependencySet( + StatementRef(insn_id=insn_before.id), + StatementRef(insn_id=insn_after.id), + {dt.SAME: shared_non_conc_inames}, + preprocessed_knl.get_inames_domain(insn_before_inames), + preprocessed_knl.get_inames_domain(insn_after_inames), + )) + + # loop-carried deps ------------------------------------------ + + # Go through insns and get all unique insn.depends_on iname sets + non_conc_iname_subsets = get_all_nonconcurrent_insn_iname_subsets( + preprocessed_knl, exclude_empty=True, non_conc_inames=non_conc_inames) + + # For each set of insns within a given iname set, find sources and sinks. + # Then make PRIOR dep from all sinks to all sources at previous iterations + for iname_subset in non_conc_iname_subsets: + # find items within this iname set + linearization_item_ids = get_linearization_item_ids_within_inames( + preprocessed_knl, iname_subset) + + # find sources and sinks + sources, sinks = get_dependency_sources_and_sinks( + preprocessed_knl, linearization_item_ids) + + # create prior deps + + # in future, consider inserting single no-op source and sink + for source_id in sources: + for sink_id in sinks: + sink_insn_inames = preprocessed_knl.id_to_insn[sink_id].within_inames + source_insn_inames = preprocessed_knl.id_to_insn[source_id].within_inames + shared_inames = sink_insn_inames & source_insn_inames + shared_non_conc_inames = shared_inames & non_conc_inames + + spds.add( + StatementPairDependencySet( + StatementRef(insn_id=sink_id), + StatementRef(insn_id=source_id), + {dt.PRIOR: shared_non_conc_inames}, + preprocessed_knl.get_inames_domain(sink_insn_inames), + preprocessed_knl.get_inames_domain(source_insn_inames), + )) + + dep_maps = set() + for statement_pair_dep_set in spds: + # create a map representing constraints from the dependency, + # which maps statement instance to all stmt instances that must occur later + # and is acquired from the non-preprocessed kernel + constraint_map = create_dependency_constraint( + statement_pair_dep_set, + knl.loop_priority, + ) + + dep_maps.add(( + statement_pair_dep_set.statement_before.insn_id, + statement_pair_dep_set.statement_after.insn_id, + constraint_map, + )) + + return frozenset(dep_maps) + + +def check_linearization_validity( + knl, + dep_maps, + linearization_items, + ): + # TODO document + + from loopy.schedule.checker.dependency import ( + create_dependency_constraint, + ) + from loopy.schedule.checker.lexicographic_order_map import ( + get_statement_ordering_map, + ) + from loopy.schedule.checker.utils import ( + prettier_map_string, + ) + + # Preprocess if not already preprocessed + # note: kernels must always be preprocessed before scheduling + from loopy import preprocess_kernel + preprocessed_knl = preprocess_kernel(knl) + + # For each dependency, create+test linearization containing pair of insns------ + linearization_is_valid = True + #for statement_pair_dep_set in statement_pair_dep_sets: + for insn_id_before, insn_id_after, constraint_map in dep_maps: + # TODO, since we now get the doms inside + # build_maps() + # reconsider the content of statement_pair_dep_set, which + # currently contains doms(do we still want them there?) + + # Create PairwiseScheduleBuilder: mapping of {statement instance: lex point} + # include only instructions involved in this dependency + sched_builder = get_schedule_for_statement_pair( + preprocessed_knl, + linearization_items, + insn_id_before, + insn_id_after, + ) + + # Get two isl maps from the PairwiseScheduleBuilder, + # one for each linearization item involved in the dependency; + isl_sched_map_before, isl_sched_map_after = sched_builder.build_maps( + preprocessed_knl) + + # get map representing lexicographic ordering + sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space() + + # create statement instance ordering, + # maps each statement instance to all statement instances occuring later + sio = get_statement_ordering_map( + isl_sched_map_before, + isl_sched_map_after, + sched_lex_order_map, + ) + + # reorder variables/params in constraint map space to match SIO so we can + # check to see whether the constraint map is a subset of the SIO + # (spaces must be aligned so that the variables in the constraint map + # correspond to the same variables in the SIO) + from loopy.schedule.checker.utils import ( + ensure_dim_names_match_and_align, + ) + + aligned_constraint_map = ensure_dim_names_match_and_align( + constraint_map, sio) + + import islpy as isl + assert aligned_constraint_map.space == sio.space + assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.in_) + == sio.space.get_var_names(isl.dim_type.in_)) + assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.out) + == sio.space.get_var_names(isl.dim_type.out)) + assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.param) + == sio.space.get_var_names(isl.dim_type.param)) + + if not aligned_constraint_map.is_subset(sio): + + linearization_is_valid = False + + print("================ constraint check failure =================") + print("Constraint map not subset of SIO") + print("Dependencies:") + print(insn_id_before+"->"+insn_id_after) + print(prettier_map_string(constraint_map)) + print("Statement instance ordering:") + print(prettier_map_string(sio)) + print("constraint_map.gist(sio):") + print(prettier_map_string(aligned_constraint_map.gist(sio))) + print("sio.gist(constraint_map)") + print(prettier_map_string(sio.gist(aligned_constraint_map))) + print("Loop priority known:") + print(preprocessed_knl.loop_priority) + print("===========================================================") + + return linearization_is_valid diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py new file mode 100644 index 0000000000000000000000000000000000000000..4d22990579f49f7c62b8bb1b6751fa5fbe4ec914 --- /dev/null +++ b/loopy/schedule/checker/dependency.py @@ -0,0 +1,707 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +class DependencyType: + """Strings specifying a particular type of dependency relationship. + + .. attribute:: SAME + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff SAME({i, j})`` specifies that + ``insn0 happens before insn1 iff {i' = i and j' = j and ...}``. + Note that ``SAME({}) = True``. + + .. attribute:: PRIOR + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, k, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', k', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, k, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff PRIOR({i, j, k})`` specifies one of + two possibilities, depending on whether the loop nest ordering is + known. If the loop nest ordering is unknown, then + ``insn0 happens before insn1 iff {i' < i and j' < j and k' < k ...}``. + If the loop nest ordering is known, the condition becomes + ``{i', j', k', ...}`` is lexicographically less than ``{i, j, k, ...}``, + i.e., ``i' < i or (i' = i and j' < j) or (i' = i and j' = j and k' < k) ...``. + + """ + + SAME = "same" + PRIOR = "prior" + + +class StatementPairDependencySet(object): + """A set of dependencies between two statements. + + .. attribute:: statement_before + + A :class:`loopy.schedule.checker.schedule.StatementRef` depended + on by statement_after. + + .. attribute:: statement_after + + A :class:`loopy.schedule.checker.schedule.StatementRef` which + cdepends on statement_before. + + .. attribute:: deps + + A :class:`dict` mapping instances of :class:`DependencyType` to + the :mod:`loopy` kernel inames involved in that particular + dependency relationship. + + .. attribute:: dom_before + + A :class:`islpy.BasicSet` representing the domain for the + dependee statement. + + .. attribute:: dom_after + + A :class:`islpy.BasicSet` representing the domain for the + depender statement. + + """ + + def __init__( + self, + statement_before, + statement_after, + deps, # {dep_type: iname_set} + dom_before=None, + dom_after=None, + ): + self.statement_before = statement_before + self.statement_after = statement_after + self.deps = deps + self.dom_before = dom_before + self.dom_after = dom_after + + def __eq__(self, other): + return ( + self.statement_before == other.statement_before + and self.statement_after == other.statement_after + and self.deps == other.deps + and self.dom_before == other.dom_before + and self.dom_after == other.dom_after + ) + + def __lt__(self, other): + return self.__hash__() < other.__hash__() + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.statement_before) + key_builder.rec(key_hash, self.statement_after) + key_builder.rec(key_hash, self.deps) + key_builder.rec(key_hash, self.dom_before) + key_builder.rec(key_hash, self.dom_after) + + def __str__(self): + result = "%s --before->\n%s iff\n " % ( + self.statement_before, self.statement_after) + return result + " and\n ".join( + ["(%s : %s)" % (dep_type, inames) + for dep_type, inames in self.deps.items()]) + + +def create_elementwise_comparison_conjunction_set( + names0, names1, islvars, op="eq"): + """Create a set constrained by the conjunction of conditions comparing + `names0` to `names1`. + + :arg names0: A list of :class:`str` representing variable names. + + :arg names1: A list of :class:`str` representing variable names. + + :arg islvars: A dictionary from variable names to :class:`islpy.PwAff` + instances that represent each of the variables + (islvars may be produced by `islpy.make_zero_and_vars`). The key + '0' is also include and represents a :class:`islpy.PwAff` zero constant. + + :arg op: A :class:`str` describing the operator to use when creating + the set constraints. Options: `eq` for `=`, `lt` for `<` + + :returns: A set involving `islvars` cosntrained by the constraints + `{names0[0] names1[0] and names0[1] names1[1] and ...}`. + + """ + + # initialize set with constraint that is always true + conj_set = islvars[0].eq_set(islvars[0]) + for n0, n1 in zip(names0, names1): + if op == "eq": + conj_set = conj_set & islvars[n0].eq_set(islvars[n1]) + elif op == "lt": + conj_set = conj_set & islvars[n0].lt_set(islvars[n1]) + + return conj_set + + +def _convert_constraint_set_to_map(constraint_set, mv_count, src_position=None): + dim_type = isl.dim_type + constraint_map = isl.Map.from_domain(constraint_set) + if src_position: + return constraint_map.move_dims( + dim_type.out, 0, dim_type.in_, src_position, mv_count) + else: + return constraint_map.move_dims( + dim_type.out, 0, dim_type.in_, mv_count, mv_count) + + +def create_dependency_constraint( + statement_dep_set, + loop_priorities, + ): + """Create a statement dependency constraint represented as a map from + each statement instance to statement instances that must occur later, + i.e., ``{[s'=0, i', j'] -> [s=1, i, j] : condition on {i', j', i, j}}`` + indicates that statement ``0`` comes before statment ``1`` when the + specified condition on inames ``i',j',i,j`` is met. ``i'`` and ``j'`` + are the values of inames ``i`` and ``j`` in first statement instance. + + :arg statement_dep_set: A :class:`StatementPairDependencySet` describing + the dependency relationship between the two statements. + + :arg loop_priorities: A list of tuples from the ``loop_priority`` + attribute of :class:`loopy.LoopKernel` specifying the loop nest + ordering rules. + + :returns: An :class:`islpy.Map` mapping each statement instance to all + statement instances that must occur later according to the constraints. + + """ + + from loopy.schedule.checker.utils import ( + make_islvars_with_marker, + append_apostrophes, + add_dims_to_isl_set, + insert_missing_dims_and_reorder_by_name, + append_marker_to_isl_map_var_names, + list_var_names_in_isl_sets, + ) + from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME + # This function uses the dependency given to create the following constraint: + # Statement [s,i,j] comes before statement [s',i',j'] iff + + dom_inames_ordered_before = list_var_names_in_isl_sets( + [statement_dep_set.dom_before]) + dom_inames_ordered_after = list_var_names_in_isl_sets( + [statement_dep_set.dom_after]) + + # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'} + islvars = make_islvars_with_marker( + var_names_needing_marker=[STATEMENT_VAR_NAME]+dom_inames_ordered_before, + other_var_names=[STATEMENT_VAR_NAME]+dom_inames_ordered_after, + marker="'", + ) + statement_var_name_prime = STATEMENT_VAR_NAME+"'" + + # initialize constraints to False + # this will disappear as soon as we add a constraint + all_constraints_set = islvars[0].eq_set(islvars[0] + 1) + + # for each (dep_type, inames) pair, create 'happens before' constraint, + # all_constraints_set will be the union of all these constraints + dt = DependencyType + for dep_type, inames in statement_dep_set.deps.items(): + # need to put inames in a list so that order of inames and inames' + # matches when calling create_elementwise_comparison_conj... + if not isinstance(inames, list): + inames_list = list(inames) + else: + inames_list = inames[:] + inames_prime = append_apostrophes(inames_list) # e.g., [j', k'] + + if dep_type == dt.SAME: + constraint_set = create_elementwise_comparison_conjunction_set( + inames_prime, inames_list, islvars, op="eq") + elif dep_type == dt.PRIOR: + + priority_known = False + # if nesting info is provided: + if loop_priorities: + # assumes all loop_priority tuples are consistent + + # with multiple priority tuples, determine whether the combined + # info they contain can give us a single, full proiritization, + # e.g., if prios={(a, b), (b, c), (c, d, e)}, then we know + # a -> b -> c -> d -> e + + # remove irrelevant inames from priority tuples (because we're + # about to perform a costly operation on remaining tuples) + relevant_priorities = set() + for p_tuple in loop_priorities: + new_tuple = [iname for iname in p_tuple if iname in inames_list] + # empty tuples and single tuples don't help us define + # a nesting, so ignore them (if we're dealing with a single + # iname, priorities will be ignored later anyway) + if len(new_tuple) > 1: + relevant_priorities.add(tuple(new_tuple)) + + # create a mapping from each iname to inames that must be + # nested inside that iname + nested_inside = {} + for outside_iname in inames_list: + nested_inside_inames = set() + for p_tuple in relevant_priorities: + if outside_iname in p_tuple: + nested_inside_inames.update([ + inside_iname for inside_iname in + p_tuple[p_tuple.index(outside_iname)+1:]]) + nested_inside[outside_iname] = nested_inside_inames + + from loopy.schedule.checker.utils import ( + get_orderings_of_length_n) + # get all orderings that are explicitly allowed by priorities + orders = get_orderings_of_length_n( + nested_inside, + required_length=len(inames_list), + #return_first_found=True, + return_first_found=False, # slower; allows priorities test below + ) + + if orders: + # test for invalid priorities (includes cycles) + if len(orders) != 1: + raise ValueError( + "create_dependency_constriant encountered invalid " + "priorities %s" + % (loop_priorities)) + priority_known = True + priority_tuple = orders.pop() + + # if only one loop, we know the priority + if not priority_known and len(inames_list) == 1: + priority_tuple = tuple(inames_list) + priority_known = True + + if priority_known: + # PRIOR requires statement_before complete previous iterations + # of loops before statement_after completes current iteration + # according to loop nest order + inames_list_nest_ordered = [ + iname for iname in priority_tuple + if iname in inames_list] + inames_list_nest_ordered_prime = append_apostrophes( + inames_list_nest_ordered) + if set(inames_list_nest_ordered) != set(inames_list): + # TODO could this happen? + assert False + + from loopy.schedule.checker import ( + lexicographic_order_map as lom) + # TODO handle case where inames list is empty + constraint_set = lom.get_lex_order_constraint( + inames_list_nest_ordered_prime, + inames_list_nest_ordered, + islvars, + ) + else: # priority not known + # PRIOR requires upper left quadrant happen before: + constraint_set = create_elementwise_comparison_conjunction_set( + inames_prime, inames_list, islvars, op="lt") + + # get ints representing statements in PairwiseSchedule + s_before_int = 0 + s_after_int = 0 if ( + statement_dep_set.statement_before.insn_id == + statement_dep_set.statement_after.insn_id + ) else 1 + + # set statement_var_name == statement # + constraint_set = constraint_set & islvars[statement_var_name_prime].eq_set( + islvars[0]+s_before_int) + constraint_set = constraint_set & islvars[STATEMENT_VAR_NAME].eq_set( + islvars[0]+s_after_int) + + # union this constraint_set with all_constraints_set + all_constraints_set = all_constraints_set | constraint_set + + # convert constraint set to map + all_constraints_map = _convert_constraint_set_to_map( + all_constraints_set, + mv_count=len(dom_inames_ordered_after)+1, # +1 for statement var + src_position=len(dom_inames_ordered_before)+1, # +1 for statement var + ) + + # now apply domain sets to constraint variables + statement_var_idx = 0 # index of statement_var dimension in map + # (anything other than 0 risks being out of bounds) + + # add statement variable to doms to enable intersection + range_to_intersect = add_dims_to_isl_set( + statement_dep_set.dom_after, isl.dim_type.out, + [STATEMENT_VAR_NAME], statement_var_idx) + domain_constraint_set = append_marker_to_isl_map_var_names( + statement_dep_set.dom_before, isl.dim_type.set, marker="'") + domain_to_intersect = add_dims_to_isl_set( + domain_constraint_set, isl.dim_type.out, + [statement_var_name_prime], statement_var_idx) + + # insert inames missing from doms to enable intersection + domain_to_intersect = insert_missing_dims_and_reorder_by_name( + domain_to_intersect, isl.dim_type.out, + append_apostrophes([STATEMENT_VAR_NAME] + dom_inames_ordered_before)) + range_to_intersect = insert_missing_dims_and_reorder_by_name( + range_to_intersect, + isl.dim_type.out, + [STATEMENT_VAR_NAME] + dom_inames_ordered_after) + + # intersect doms + map_with_loop_domain_constraints = all_constraints_map.intersect_domain( + domain_to_intersect).intersect_range(range_to_intersect) + + return map_with_loop_domain_constraints + + +# TODO no longer used, move elsewhere +def _create_5pt_stencil_dependency_constraint( + dom_before_constraint_set, + dom_after_constraint_set, + sid_before, + sid_after, + space_iname, + time_iname, + all_dom_inames_ordered=None, # TODO eliminate need for this arg + ): + """ WIP: NO NEED TO REVIEW YET """ + + from loopy.schedule.checker.utils import ( + make_islvars_with_marker, + append_apostrophes, + add_dims_to_isl_set, + insert_missing_dims_and_reorder_by_name, + append_marker_to_isl_map_var_names, + ) + from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME + # This function uses the dependency given to create the following constraint: + # Statement [s,i,j] comes before statement [s',i',j'] iff + + from loopy.schedule.checker.utils import ( + list_var_names_in_isl_sets, + ) + if all_dom_inames_ordered is None: + all_dom_inames_ordered = list_var_names_in_isl_sets( + [dom_before_constraint_set, dom_after_constraint_set]) + + # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'} + islvars = make_islvars_with_marker( + var_names_needing_marker=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + other_var_names=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + marker="'", + ) + statement_var_name_prime = STATEMENT_VAR_NAME+"'" + + # initialize constraints to False + # this will disappear as soon as we add a constraint + #all_constraints_set = islvars[0].eq_set(islvars[0] + 1) + + space_iname_prime = space_iname + "'" + time_iname_prime = time_iname + "'" + one = islvars[0] + 1 + two = islvars[0] + 2 + # global: + """ + constraint_set = ( + islvars[time_iname_prime].gt_set(islvars[time_iname]) & + ( + (islvars[space_iname_prime]-two).lt_set(islvars[space_iname]) & + islvars[space_iname].lt_set(islvars[space_iname_prime]+two) + ) + | + islvars[time_iname_prime].gt_set(islvars[time_iname] + one) & + islvars[space_iname].eq_set(islvars[space_iname_prime]) + ) + """ + # local dep: + constraint_set = ( + islvars[time_iname].eq_set(islvars[time_iname_prime] + one) & ( + (islvars[space_iname]-two).lt_set(islvars[space_iname_prime]) & + islvars[space_iname_prime].lt_set(islvars[space_iname]+two)) + | + (islvars[time_iname].eq_set(islvars[time_iname_prime] + two) + & islvars[space_iname_prime].eq_set(islvars[space_iname])) + ) + + # set statement_var_name == statement # + constraint_set = constraint_set & islvars[statement_var_name_prime].eq_set( + islvars[0]+sid_before) + constraint_set = constraint_set & islvars[STATEMENT_VAR_NAME].eq_set( + islvars[0]+sid_after) + + # convert constraint set to map + all_constraints_map = _convert_constraint_set_to_map( + constraint_set, len(all_dom_inames_ordered) + 1) # +1 for statement var + + # now apply domain sets to constraint variables + statement_var_idx = 0 # index of statement_var dimension in map + + # add statement variable to doms to enable intersection + range_to_intersect = add_dims_to_isl_set( + dom_after_constraint_set, isl.dim_type.out, + [STATEMENT_VAR_NAME], statement_var_idx) + domain_constraint_set = append_marker_to_isl_map_var_names( + dom_before_constraint_set, isl.dim_type.set, marker="'") + domain_to_intersect = add_dims_to_isl_set( + domain_constraint_set, isl.dim_type.out, + [statement_var_name_prime], statement_var_idx) + + # insert inames missing from doms to enable intersection + domain_to_intersect = insert_missing_dims_and_reorder_by_name( + domain_to_intersect, isl.dim_type.out, + append_apostrophes([STATEMENT_VAR_NAME] + all_dom_inames_ordered)) + range_to_intersect = insert_missing_dims_and_reorder_by_name( + range_to_intersect, + isl.dim_type.out, + [STATEMENT_VAR_NAME] + all_dom_inames_ordered) + + # intersect doms + map_with_loop_domain_constraints = all_constraints_map.intersect_domain( + domain_to_intersect).intersect_range(range_to_intersect) + + return map_with_loop_domain_constraints + + +def create_arbitrary_dependency_constraint( + constraint_str, + dom_before_constraint_set, + dom_after_constraint_set, + sid_before, + sid_after, + all_dom_inames_ordered=None, # TODO eliminate need for this arg + ): + """ WIP: NO NEED TO REVIEW YET """ + + # TODO test after switching primes to before vars + + from loopy.schedule.checker.utils import ( + make_islvars_with_marker, + #append_apostrophes, + append_marker_to_strings, + add_dims_to_isl_set, + insert_missing_dims_and_reorder_by_name, + append_marker_to_isl_map_var_names, + ) + from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME + # This function uses the constraint given to create the following map: + # Statement [s,i,j] comes before statement [s',i',j'] iff + + from loopy.schedule.checker.utils import ( + list_var_names_in_isl_sets, + ) + if all_dom_inames_ordered is None: + all_dom_inames_ordered = list_var_names_in_isl_sets( + [dom_before_constraint_set, dom_after_constraint_set]) + + # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'} + islvars = make_islvars_with_marker( + var_names_needing_marker=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + other_var_names=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + marker="p", + ) # TODO figure out before/after notation + #statement_var_name_prime = STATEMENT_VAR_NAME+"'" + statement_var_name_prime = STATEMENT_VAR_NAME+"p" + # TODO figure out before/after notation + + # initialize constraints to False + # this will disappear as soon as we add a constraint + all_constraints_set = islvars[0].eq_set(islvars[0] + 1) + space = all_constraints_set.space + from pymbolic import parse + from loopy.symbolic import aff_from_expr + + or_constraint_strs = constraint_str.split("or") + + def _quant(s): + return "(" + s + ")" + + def _diff(s0, s1): + return _quant(s0) + "-" + _quant(s1) + + for or_constraint_str in or_constraint_strs: + and_constraint_strs = or_constraint_str.split("and") + #conj_constraint = islvars[0].eq_set(islvars[0]) # init to true + conj_constraint = isl.BasicSet.universe(space) + for cons_str in and_constraint_strs: + if "<=" in cons_str: + lhs, rhs = cons_str.split("<=") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(rhs, lhs))))) + # TODO something more robust than this string meddling^ + elif ">=" in cons_str: + lhs, rhs = cons_str.split(">=") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(lhs, rhs))))) + elif "<" in cons_str: + lhs, rhs = cons_str.split("<") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(rhs, lhs) + "- 1")))) + elif ">" in cons_str: + lhs, rhs = cons_str.split(">") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(lhs, rhs) + "- 1")))) + elif "=" in cons_str: + lhs, rhs = cons_str.split("=") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.equality_from_aff( + aff_from_expr(space, parse(_diff(lhs, rhs))))) + else: + 1/0 + all_constraints_set = all_constraints_set | conj_constraint + + # set statement_var_name == statement # + all_constraints_set = ( + all_constraints_set & islvars[statement_var_name_prime].eq_set( + islvars[0]+sid_before) + ) + all_constraints_set = ( + all_constraints_set & islvars[STATEMENT_VAR_NAME].eq_set( + islvars[0]+sid_after) + ) + + # convert constraint set to map + all_constraints_map = _convert_constraint_set_to_map( + all_constraints_set, len(all_dom_inames_ordered) + 1) # +1 for statement var + + # now apply domain sets to constraint variables + statement_var_idx = 0 # index of statement_var dimension in map + + # add statement variable to doms to enable intersection + range_to_intersect = add_dims_to_isl_set( + dom_after_constraint_set, isl.dim_type.out, + [STATEMENT_VAR_NAME], statement_var_idx) + domain_constraint_set = append_marker_to_isl_map_var_names( + dom_before_constraint_set, isl.dim_type.set, marker="p") + # TODO figure out before/after notation + domain_to_intersect = add_dims_to_isl_set( + domain_constraint_set, isl.dim_type.out, + [statement_var_name_prime], statement_var_idx) + + # insert inames missing from doms to enable intersection + domain_to_intersect = insert_missing_dims_and_reorder_by_name( + domain_to_intersect, isl.dim_type.out, + append_marker_to_strings( # TODO figure out before/after notation + [STATEMENT_VAR_NAME] + all_dom_inames_ordered, "p")) + range_to_intersect = insert_missing_dims_and_reorder_by_name( + range_to_intersect, + isl.dim_type.out, + [STATEMENT_VAR_NAME] + all_dom_inames_ordered) + + # intersect doms + map_with_loop_domain_constraints = all_constraints_map.intersect_domain( + domain_to_intersect).intersect_range(range_to_intersect) + + return map_with_loop_domain_constraints + + +def get_dependency_sources_and_sinks(knl, linearization_item_ids): + """Implicitly create a directed graph with the linearization items specified + by ``linearization_item_ids`` as nodes, and with edges representing a + 'happens before' relationship specfied by each legacy dependency between + two instructions. Return the sources and sinks within this graph. + + :arg linearization_item_ids: A :class:`list` of :class:`str` representing + loopy instruction ids. + + :returns: Two instances of :class:`set` of :class:`str` instruction ids + representing the sources and sinks in the dependency graph. + + """ + sources = set() + dependees = set() # all dependees (within linearization_item_ids) + for item_id in linearization_item_ids: + # find the deps within linearization_item_ids + deps = knl.id_to_insn[item_id].depends_on & linearization_item_ids + if deps: + # add deps to dependees + dependees.update(deps) + else: # has no deps (within linearization_item_ids), this is a source + sources.add(item_id) + + # sinks don't point to anyone + sinks = linearization_item_ids - dependees + + return sources, sinks + + +def filter_deps_by_intersection_with_SAME( + knl, + deps, + non_conc_inames, + ): + # TODO document + from loopy.schedule.checker.schedule import StatementRef + + dt = DependencyType + + # determine which dep relations have a non-empty intersection with + # the SAME relation + deps_filtered = [] + for insn_id_before, insn_id_after, dep_constraint_map in deps: + + # create isl map representing "SAME" dep for these two insns + shared_nc_inames = ( + knl.id_to_insn[insn_id_before].within_inames & + knl.id_to_insn[insn_id_after].within_inames & + non_conc_inames) + + same_dep_set = StatementPairDependencySet( + StatementRef(insn_id=insn_id_before), + StatementRef(insn_id=insn_id_after), + {dt.SAME: shared_nc_inames}, + knl.get_inames_domain(knl.id_to_insn[insn_id_before].within_inames), + knl.get_inames_domain(knl.id_to_insn[insn_id_after].within_inames), + ) + + same_dep_constraint_map = create_dependency_constraint( + same_dep_set, + knl.loop_priority, # TODO use new must_nest + ) + + # see whether the intersection of dep map and SAME dep map exists + intersect_dep_and_same = same_dep_constraint_map & dep_constraint_map + intersect_not_empty = not bool(intersect_dep_and_same.is_empty()) + + if intersect_not_empty: + deps_filtered.append((insn_id_before, insn_id_after, dep_constraint_map)) + + return deps_filtered diff --git a/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py b/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py new file mode 100644 index 0000000000000000000000000000000000000000..717dde1416ff91922edb5a77ac237382b5e6a4de --- /dev/null +++ b/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py @@ -0,0 +1,368 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +""" WIP: NO NEED TO REVIEW YET """ +import loopy as lp +import numpy as np +from loopy.schedule.checker.utils import ( + create_graph_from_pairs, +) +from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, +) +from loopy import ( + preprocess_kernel, + get_one_linearized_kernel, +) + +# Choose kernel ---------------------------------------------------------- + +knl_choice = "example" +#knl_choice = "unused_inames" +#knl_choice = "matmul" +#knl_choice = "scan" +#knl_choice = "dependent_domain" +#knl_choice = "stroud_bernstein_orig" # TODO invalid sched? +#knl_choice = "ilp_kernel" +#knl_choice = "add_barrier" +#knl_choice = "nop" +#knl_choice = "nest_multi_dom" +#knl_choice = "loop_carried_deps" + +if knl_choice == "example": + knl = lp.make_kernel( + [ + "{[i,ii]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c} + end + end + for t + e[t] = f[t] {id=insn_d} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + #knl = lp.tag_inames(knl, {"i": "l.0"}) + #knl = lp.prioritize_loops(knl, "i,k,j") + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") +if knl_choice == "unused_inames": + knl = lp.make_kernel( + [ + "{[i,ii]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + end + end + """, + name="unused_inames", + assumptions="pi,pj,pk >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32}) + #knl = lp.tag_inames(knl, {"i": "l.0"}) + #knl = lp.prioritize_loops(knl, "i,k,j") + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") +elif knl_choice == "matmul": + bsize = 16 + knl = lp.make_kernel( + "{[i,k,j]: 0<=i {[i,j]: 0<=i {[i]: 0<=i xi = qpts[1, i2] + <> s = 1-xi + <> r = xi/s + <> aind = 0 {id=aind_init} + for alpha1 + <> w = s**(deg-alpha1) {id=init_w} + for alpha2 + tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \ + {id=write_tmp,dep=init_w:aind_init} + w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ + {id=update_w,dep=init_w:write_tmp} + aind = aind + 1 \ + {id=aind_incr,dep=aind_init:write_tmp:update_w} + end + end + end + """, + [lp.GlobalArg("coeffs", None, shape=None), "..."], + name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1") + knl = lp.add_and_infer_dtypes(knl, + dict(coeffs=np.float32, qpts=np.int32)) + knl = lp.fix_parameters(knl, nqp1d=7, deg=4) + knl = lp.split_iname(knl, "el", 16, inner_tag="l.0") + knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0", + inner_tag="ilp", slabs=(0, 1)) + knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr")) + # Must declare coeffs to have "no" shape, to keep loopy + # from trying to figure it out the shape automatically. +elif knl_choice == "ilp_kernel": + knl = lp.make_kernel( + "{[i,j,ilp_iname]: 0 <= i,j < n and 0 <= ilp_iname < 4}", + """ + for i + for j + for ilp_iname + tmp[i,j,ilp_iname] = 3.14 + end + end + end + """, + name="ilp_kernel", + assumptions="n>=1 and n mod 4 = 0", + ) + # TODO why is conditional on ilp_name? + knl = lp.tag_inames(knl, {"j": "l.0", "ilp_iname": "ilp"}) + #knl = lp.prioritize_loops(knl, "i_outer_outer,i_outer_inner,i_inner,a") +if knl_choice == "add_barrier": + np.random.seed(17) + #a = np.random.randn(16) + cnst = np.random.randn(16) + knl = lp.make_kernel( + "{[i, ii]: 0<=i, ii c_end = 2 + for c + ... nop + end + end + """, + "...", + seq_dependencies=True) + knl = lp.fix_parameters(knl, dim=3) +if knl_choice == "nest_multi_dom": + #"{[i,j,k]: 0<=i,j,kacc = 0 {id=insn0} + for j + for k + acc = acc + j + k {id=insn1,dep=insn0} + end + end + end + end + """, + name="nest_multi_dom", + #assumptions="n >= 1", + assumptions="ni,nj,nk,nx >= 1", + lang_version=(2018, 2) + ) + """ + <>foo = 0 {id=insn0} + for i + <>acc = 0 {id=insn1} + for j + for k + acc = acc + j + k {id=insn2,dep=insn1} + end + end + foo = foo + acc {id=insn3,dep=insn2} + end + <>bar = foo {id=insn4,dep=insn3} + """ + knl = lp.prioritize_loops(knl, "x,xx,i") + knl = lp.prioritize_loops(knl, "i,j") + knl = lp.prioritize_loops(knl, "j,k") + +if knl_choice == "loop_carried_deps": + knl = lp.make_kernel( + "{[i]: 0<=iacc0 = 0 {id=insn0} + for i + acc0 = acc0 + i {id=insn1,dep=insn0} + <>acc2 = acc0 + i {id=insn2,dep=insn1} + <>acc3 = acc2 + i {id=insn3,dep=insn2} + <>acc4 = acc0 + i {id=insn4,dep=insn1} + end + """, + name="loop_carried_deps", + assumptions="n >= 1", + lang_version=(2018, 2) + ) + +unprocessed_knl = knl.copy() + +deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + +# get a linearization to check +knl = preprocess_kernel(knl) +knl = get_one_linearized_kernel(knl) +print("kernel schedueld") +linearization_items = knl.linearization +print("checking validity") + +linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items, + ) + +""" +legacy_statement_pair_dep_sets = lp.statement_pair_dep_sets_from_legacy_knl(knl) + +# get a linearization to check +from loopy import get_one_linearized_kernel +linearized_knl = get_one_linearized_kernel(knl) +linearization_items = linearized_knl.linearization + +linearization_is_valid = lp.check_linearization_validity( + knl, legacy_statement_pair_dep_sets, linearization_items) +""" + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) + + +print("="*80) +print("testing dep sort") +print("="*80) + +# create dependency graph + +# for which deps does the intersection with the SAME dependency relation exist? +# create a graph including these deps as edges (from after->before) + +from loopy.schedule.checker.utils import ( + get_concurrent_inames, +) +_, non_conc_inames = get_concurrent_inames(knl) +legacy_deps_filtered_by_same = filter_deps_by_intersection_with_SAME( + knl, + deps, + non_conc_inames, + ) + +# get dep graph edges +dep_graph_pairs = [ + (insn_id_before, insn_id_after) + for insn_id_before, insn_id_after, _ in legacy_deps_filtered_by_same] + +# create dep graph from edges +dep_graph = create_graph_from_pairs(dep_graph_pairs) + +print("dep_graph:") +for k, v in dep_graph.items(): + print("%s: %s" % (k, v)) diff --git a/loopy/schedule/checker/experimental_scripts/example_wave_equation.py b/loopy/schedule/checker/experimental_scripts/example_wave_equation.py new file mode 100644 index 0000000000000000000000000000000000000000..ed2da94e58c7ba821a9ba05d429ac562d108d774 --- /dev/null +++ b/loopy/schedule/checker/experimental_scripts/example_wave_equation.py @@ -0,0 +1,623 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +""" WIP: NO NEED TO REVIEW YET """ +import loopy as lp +from loopy import generate_code_v2 +from loopy import get_one_linearized_kernel +from loopy import preprocess_kernel +import numpy as np +import islpy as isl +#from loopy.kernel_stat_collector import KernelStatCollector +#from loopy.kernel_stat_collector import KernelStatOptions as kso # noqa +from loopy.schedule.checker.utils import ( + prettier_map_string, + ensure_dim_names_match_and_align, + append_marker_to_isl_map_var_names, + get_concurrent_inames, +) +from loopy.schedule.checker.dependency import ( + create_arbitrary_dependency_constraint, +) +from loopy.schedule.checker.schedule import PairwiseScheduleBuilder +from loopy.schedule.checker.lexicographic_order_map import ( + get_statement_ordering_map, +) + +# Make kernel ---------------------------------------------------------- + +# u[x,t+1] = 2*u[x,t] - u[x,t-1] + c*(dt/dx)**2*(u[x+1,t] - 2*u[x,t] + u[x-1,t]) +# mine, works: +# "{[x,t]: 1<=x {[ix, it]: 1<=ix {[ix, it]: 1<=ix lex time):") + #print(isl_sched_map.space) + #print("-"*80) + +# }}} + +# get map representing lexicographic ordering +sched_lex_order_map = sched.get_lex_order_map_for_sched_space() + +# {{{ verbose + +""" +if verbose: + print("sched lex order map:") + print(prettier_map_string(sched_lex_order_map)) + print("space (lex time -> lex time):") + print(sched_lex_order_map.space) + print("-"*80) +""" + +# }}} + +# create statement instance ordering, +# maps each statement instance to all statement instances occuring later +sio = get_statement_ordering_map( + isl_sched_map_before, + isl_sched_map_after, + sched_lex_order_map, + before_marker="p") + +# {{{ verbose + +if verbose: + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("-"*80) + +if verbose: + print("constraint map space (before aligning):") + print(constraint_map.space) + +# }}} + +# align constraint map spaces to match sio so we can compare them +aligned_constraint_map = ensure_dim_names_match_and_align(constraint_map, sio) + +# {{{ verbose + +if verbose: + print("constraint map space (after aligning):") + print(aligned_constraint_map.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + +# }}} + +assert aligned_constraint_map.space == sio.space +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.in_) + == sio.space.get_var_names(isl.dim_type.in_)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.out) + == sio.space.get_var_names(isl.dim_type.out)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.param) + == sio.space.get_var_names(isl.dim_type.param)) + +linearization_is_valid = aligned_constraint_map.is_subset(sio) + +if not linearization_is_valid: + + # {{{ verbose + + if verbose: + print("================ constraint check failure =================") + print("constraint map not subset of SIO") + print("dependency:") + print(prettier_map_string(constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("constraint_map.gist(sio):") + print(aligned_constraint_map.gist(sio)) + print("sio.gist(constraint_map)") + print(sio.gist(aligned_constraint_map)) + print("loop priority known:") + print(preprocessed_knl.loop_priority) + """ + from loopy.schedule.checker.utils import ( + get_concurrent_inames, + ) + conc_inames, non_conc_inames = get_concurrent_inames(linearized_knl) + print("concurrent inames:", conc_inames) + print("sequential inames:", non_conc_inames) + print("constraint map space (stmt instances -> stmt instances):") + print(aligned_constraint_map.space) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("{insn id -> sched sid int} dict:") + print(lp_insn_id_to_lex_sched_id) + """ + print("===========================================================") + + # }}} + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) + + +# ====================================================================== +# now do this with complicated mapping + + +# create mapping: +# old (wrong) +""" +m = isl.BasicMap( + "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: " + "16*(tx - tt + tparity) + itx - itt = ix - it and " + "16*(tx + tt) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +m2 = isl.BasicMap( + "[nx,nt,unused] -> {[statement, ix, it] -> " + "[statement'=statement, tx, tt, tparity, itt, itx]: " + "16*(tx - tt + tparity) + itx - itt = ix - it and " + "16*(tx + tt) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +m2_prime = isl.BasicMap( + "[nx,nt,unused] -> {[statement, ix, it] -> " + "[statement'=statement, tx', tt', tparity', itt', itx']: " + "16*(tx' - tt' + tparity') + itx' - itt' = ix - it and " + "16*(tx' + tt') + itt' + itx' = ix + it and " + "0<=tparity'<2 and 0 <= itx' - itt' < 16 and 0 <= itt'+itx' < 16}") +""" + +# new +# TODO remove "unused" +m = isl.BasicMap( + "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: " + "16*(tx - tt) + itx - itt = ix - it and " + "16*(tx + tt + tparity) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +m2 = isl.BasicMap( + "[nx,nt,unused] -> {[_lp_linchk_statement, ix, it] -> " + "[_lp_linchk_statement'=_lp_linchk_statement, tx, tt, tparity, itt, itx]: " + "16*(tx - tt) + itx - itt = ix - it and " + "16*(tx + tt + tparity) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +#m2_primes_after = isl.BasicMap( +# "[nx,nt,unused] -> {[statement, ix, it] -> " +# "[statement'=statement, tx', tt', tparity', itt', itx']: " +# "16*(tx' - tt') + itx' - itt' = ix - it and " +# "16*(tx' + tt' + tparity') + itt' + itx' = ix + it and " +# "0<=tparity'<2 and 0 <= itx' - itt' < 16 and 0 <= itt'+itx' < 16}") +m2_prime = isl.BasicMap( + "[nx,nt,unused] -> {[_lp_linchk_statement', ix', it'] -> " + "[_lp_linchk_statement=_lp_linchk_statement', tx, tt, tparity, itt, itx]: " + "16*(tx - tt) + itx - itt = ix' - it' and " + "16*(tx + tt + tparity) + itt + itx = ix' + it' and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") + +# TODO note order must match statement_iname_premap_order + +print("maping:") +print(prettier_map_string(m2)) + +# new kernel +knl = lp.map_domain(ref_knl, m) +knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx") +print("code after mapping:") +print(generate_code_v2(knl).device_code()) +#1/0 + +print("constraint_map before apply_range:") +print(prettier_map_string(constraint_map)) +#mapped_constraint_map = constraint_map.apply_range(m2_prime) +mapped_constraint_map = constraint_map.apply_range(m2) +print("constraint_map after apply_range:") +print(prettier_map_string(mapped_constraint_map)) +#mapped_constraint_map = mapped_constraint_map.apply_domain(m2) +mapped_constraint_map = mapped_constraint_map.apply_domain(m2_prime) +# put primes on *before* names +mapped_constraint_map = append_marker_to_isl_map_var_names( + mapped_constraint_map, isl.dim_type.in_, marker="'") + +print("constraint_map after apply_domain:") +print(prettier_map_string(mapped_constraint_map)) + +statement_inames_mapped = set(["itx", "itt", "tt", "tparity", "tx"]) +sid_before = 0 +sid_after = 0 + +preprocessed_knl = preprocess_kernel(knl) +inames_domain_before_mapped = preprocessed_knl.get_inames_domain( + statement_inames_mapped) +inames_domain_after_mapped = preprocessed_knl.get_inames_domain( + statement_inames_mapped) +print("(mapped) inames_domain_before:", inames_domain_before_mapped) +print("(mapped) inames_domain_after:", inames_domain_after_mapped) + +# ============================================= + +verbose = False +verbose = True + +# get a linearization to check +if preprocessed_knl.linearization is None: + linearized_knl = get_one_linearized_kernel(preprocessed_knl) +else: + linearized_knl = preprocessed_knl + +# {{{ verbose + +if verbose: + # Print kernel info ------------------------------------------------------ + print("="*80) + print("Kernel:") + print(linearized_knl) + #print(generate_code_v2(linearized_knl).device_code()) + print("="*80) + print("Iname tags: %s" % (linearized_knl.iname_to_tags)) + print("="*80) + print("Loopy linearization:") + for linearization_item in linearized_knl.linearization: + print(linearization_item) + + print("="*80) + print("inames_domain_before_mapped:", inames_domain_before_mapped) + print("inames_domain_after_mapped:", inames_domain_after_mapped) + +# }}} + + +conc_loop_inames, _ = get_concurrent_inames(linearized_knl) +# Create a mapping of {statement instance: lex point} +# including only instructions involved in this dependency +sched = PairwiseScheduleBuilder( + linearized_knl.linearization, + str(sid_before), + str(sid_after), + loops_to_ignore=conc_loop_inames, + ) + +# Get an isl map representing the PairwiseScheduleBuilder; +# this requires the iname domains + +# get a mapping from lex schedule id to relevant inames domain +sid_to_dom = { + sid_before: inames_domain_before_mapped, + sid_after: inames_domain_after_mapped, + } + +isl_sched_map_before, isl_sched_map_after = sched.build_maps(linearized_knl) + +# {{{ verbose + +if verbose: + print("sid_to_dom:\n", sid_to_dom) + print("PairwiseScheduleBuilder after creating isl map:") + print(sched) + print("LexSched:") + print(prettier_map_string(isl_sched_map_before)) + print(prettier_map_string(isl_sched_map_after)) + #print("space (statement instances -> lex time):") + #print(isl_sched_map.space) + #print("-"*80) + +# }}} + +# get map representing lexicographic ordering +sched_lex_order_map = sched.get_lex_order_map_for_sched_space() + +# {{{ verbose + +""" +if verbose: + print("sched lex order map:") + print(prettier_map_string(sched_lex_order_map)) + print("space (lex time -> lex time):") + print(sched_lex_order_map.space) + print("-"*80) +""" + +# }}} + +# create statement instance ordering, +# maps each statement instance to all statement instances occuring later +sio = get_statement_ordering_map( + isl_sched_map_before, + isl_sched_map_after, + sched_lex_order_map, + before_marker="'") + +# {{{ verbose + +if verbose: + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("-"*80) + +if verbose: + print("constraint map space (before aligning):") + print(constraint_map.space) + +# }}} + +# align constraint map spaces to match sio so we can compare them +aligned_constraint_map = ensure_dim_names_match_and_align(constraint_map, sio) + +# {{{ verbose + +if verbose: + print("constraint map space (after aligning):") + print(aligned_constraint_map.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + +# }}} + +assert aligned_constraint_map.space == sio.space +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.in_) + == sio.space.get_var_names(isl.dim_type.in_)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.out) + == sio.space.get_var_names(isl.dim_type.out)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.param) + == sio.space.get_var_names(isl.dim_type.param)) + +linearization_is_valid = aligned_constraint_map.is_subset(sio) + +if not linearization_is_valid: + + # {{{ verbose + + if verbose: + print("================ constraint check failure =================") + print("constraint map not subset of SIO") + print("dependency:") + print(prettier_map_string(constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("constraint_map.gist(sio):") + print(aligned_constraint_map.gist(sio)) + print("sio.gist(constraint_map)") + print(sio.gist(aligned_constraint_map)) + print("loop priority known:") + print(preprocessed_knl.loop_priority) + """ + from loopy.schedule.checker.utils import ( + get_concurrent_inames, + ) + conc_inames, non_conc_inames = get_concurrent_inames(linearized_knl) + print("concurrent inames:", conc_inames) + print("sequential inames:", non_conc_inames) + print("constraint map space (stmt instances -> stmt instances):") + print(aligned_constraint_map.space) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("{insn id -> sched sid int} dict:") + print(lp_insn_id_to_lex_sched_id) + """ + print("===========================================================") + + # }}} + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) + +''' +# (U_n^{k+1}-U_n^k)/dt = C*(U_{n+1}^k-U_n^k)/dx +# U_n^{k+1} = U_n^k + dt/dx*C*(U_{n+1}^k-U_n^k) +''' + +# Get stats ---------------------------------------------------------- + +""" +sc = KernelStatCollector( + evaluate_polys=False, + count_madds=False, # TODO enable after madd counting branch is merged + ) +#nx = 2**11 +#nt = 2**11 +nx = 2**5 +nt = 2**5 +param_dict = {"nx": nx, "nt": nt, "c": 1, "dt": 0.1, "dx": 0.1} +stat_list = [kso.WALL_TIME, kso.OP_MAP, kso.FLOP_RATE] +stats = sc.collect_stats(knl, stat_list, param_dict=param_dict) + +# Measured time + flop rate +time_measured = stats[kso.WALL_TIME] +#flop_rate_measured = stats[kso.FLOP_RATE] + +print("time:", time_measured) +""" + +""" +linearization_is_valid = lp.check_linearization_validity(knl, verbose=True) + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) +""" + +""" +linearization_is_valid = lp.check_linearization_validity(knl, verbose=True) + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) +""" diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py new file mode 100644 index 0000000000000000000000000000000000000000..b547e1d94689394642448de61274b1d52e0dbc89 --- /dev/null +++ b/loopy/schedule/checker/lexicographic_order_map.py @@ -0,0 +1,180 @@ +# coding: utf-8 +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +def get_statement_ordering_map( + sched_map_before, sched_map_after, lex_map, before_marker="'"): + """Return a mapping that maps each statement instance to + all statement instances occuring later. + + :arg sched_map_before: An :class:`islpy.Map` representing instruction + instance order for the dependee as a mapping from each statement + instance to a point in the lexicographic ordering. + + :arg sched_map_after: An :class:`islpy.Map` representing instruction + instance order for the depender as a mapping from each statement + instance to a point in the lexicographic ordering. + + :arg lex_map: An :class:`islpy.Map` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time. E.g.:: + + {[i0', i1', i2', ...] -> [i0, i1, i2, ...] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2) ...} + + :returns: An :class:`islpy.Map` representing the lex schedule as + a mapping from each statement instance to all statement instances + occuring later. I.e., we compose relations B, L, and A as + B ∘ L ∘ A^-1, where B is sched_map_before, A is sched_map_after, + and L is the lexicographic ordering map. + + """ + + sio = sched_map_before.apply_range( + lex_map).apply_range(sched_map_after.reverse()) + # append marker to in names + from loopy.schedule.checker.utils import ( + append_marker_to_isl_map_var_names, + ) + return append_marker_to_isl_map_var_names( + sio, isl.dim_type.in_, before_marker) + + +def get_lex_order_constraint(before_names, after_names, islvars=None): + """Return a constraint represented as an :class:`islpy.Set` + defining a 'happens before' relationship in a lexicographic + ordering. + + :arg before_names: A list of :class:`str` variable names representing + the lexicographic space dimensions for a point in lexicographic + time that occurs before. (see example below) + + :arg after_names: A list of :class:`str` variable names representing + the lexicographic space dimensions for a point in lexicographic + time that occurs after. (see example below) + + :arg islvars: A dictionary from variable names to :class:`islpy.PwAff` + instances that represent each of the variables + (islvars may be produced by `islpy.make_zero_and_vars`). The key + '0' is also include and represents a :class:`islpy.PwAff` zero constant. + This dictionary defines the space to be used for the set. If no + value is passed, the dictionary will be made using ``before_names`` + and ``after_names``. + + :returns: An :class:`islpy.Set` representing a constraint that enforces a + lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and + ``after_names = [i0, i1, i2]``, return the set:: + + {[i0', i1', i2', i0, i1, i2] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2)} + + """ + + # If no islvars passed, make them using the names provided + if islvars is None: + islvars = isl.make_zero_and_vars(before_names+after_names, []) + + # Initialize constraint with i0' < i0 + lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]]) + + # Initialize conjunction constraint with True. + # For each dim d, starting with d=1, this conjunction will have d equalities, + # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1)) + equality_constraint_conj = islvars[0].eq_set(islvars[0]) + + for i in range(1, len(before_names)): + + # Add the next equality constraint to equality_constraint_conj + equality_constraint_conj = equality_constraint_conj & \ + islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]]) + + # Create a conjunction constraint by combining a less-than + # constraint for this dim, e.g., (i1' < i1), with the current + # equality constraint conjunction. + # For each dim d, starting with d=1, this conjunction will have d equalities, + # and one inequality, + # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id) + full_conj_constraint = islvars[before_names[i]].lt_set( + islvars[after_names[i]]) & equality_constraint_conj + + # Union this new constraint with the current lex_order_constraint + lex_order_constraint = lex_order_constraint | full_conj_constraint + + return lex_order_constraint + + +def create_lex_order_map( + n_dims, + before_names=None, + after_names=None, + ): + """Return a mapping that maps each point in a lexicographic + ordering to every point that occurs later in lexicographic + time. + + :arg n_dims: An :class:`int` representing the number of dimensions + in the lexicographic ordering. + + :arg before_names: A list of :class:`str` variable names representing + the lexicographic space dimensions for a point in lexicographic + time that occurs before. (see example below) + + :arg after_names: A list of :class:`str` variable names representing + the lexicographic space dimensions for a point in lexicographic + time that occurs after. (see example below) + + :returns: An :class:`islpy.Map` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time. + E.g., if ``before_names = [i0', i1', i2']`` and + ``after_names = [i0, i1, i2]``, return the map:: + + {[i0', i1', i2'] -> [i0, i1, i2] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2)} + + """ + + if after_names is None: + after_names = ["i%s" % (i) for i in range(n_dims)] + if before_names is None: + from loopy.schedule.checker.utils import ( + append_marker_to_strings, + ) + before_names = append_marker_to_strings(after_names, marker="'") + + assert len(before_names) == len(after_names) == n_dims + dim_type = isl.dim_type + + lex_order_constraint = get_lex_order_constraint(before_names, after_names) + + lex_map = isl.Map.from_domain(lex_order_constraint) + lex_map = lex_map.move_dims( + dim_type.out, 0, dim_type.in_, + len(before_names), len(after_names)) + + return lex_map diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py index 6e1faf0a4d161c6bd31de68c585bf942dc8642ba..e5a6e3fd084c3affddb49aa3fdb577f08b637a41 100644 --- a/loopy/schedule/checker/schedule.py +++ b/loopy/schedule/checker/schedule.py @@ -84,6 +84,9 @@ class StatementRef(object): and self.int_id == other.int_id ) + #def __hash__(self): + # return hash(repr(self)) + def update_persistent_hash(self, key_hash, key_builder): """Custom hash computation function for use with :class:`pytools.persistent_dict.PersistentDict`. @@ -395,6 +398,24 @@ class PairwiseScheduleBuilder(object): def get_lex_var_names(self): return [LEX_VAR_PREFIX+str(i) for i in range(self.max_lex_dims())] + def get_lex_order_map_for_sched_space(self): + """Return an :class:`islpy.BasicMap` that maps each point in a + lexicographic ordering to every point that is + lexocigraphically greater. + """ + + from loopy.schedule.checker.lexicographic_order_map import ( + create_lex_order_map, + ) + n_dims = self.max_lex_dims() + return create_lex_order_map( + n_dims, after_names=self.get_lex_var_names()) + + def __eq__(self, other): + return ( + self.stmt_instance_before == other.stmt_instance_before + and self.stmt_instance_after == other.stmt_instance_after) + def __str__(self): def stringify_sched_stmt_instance(stmt_inst): diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py index e862d166e17e8346d8fb87d02443ce4721280878..5c51b61b6418224176db447e727c901ac6082d63 100644 --- a/loopy/schedule/checker/utils.py +++ b/loopy/schedule/checker/utils.py @@ -132,6 +132,72 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map): return aligned_obj_map +def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"): + """Return an isl_map with marker appended to + dim_type dimension names. + + :arg old_isl_map: A :class:`islpy.Map`. + + :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`, + specifying the dimension to be marked. + + :returns: A :class:`islpy.Map` matching `old_isl_map` with + apostrophes appended to dim_type dimension names. + + """ + + new_map = old_isl_map.copy() + for i in range(len(old_isl_map.get_var_names(dim_type))): + new_map = new_map.set_dim_name(dim_type, i, old_isl_map.get_dim_name( + dim_type, i)+marker) + return new_map + + +def make_islvars_with_marker( + var_names_needing_marker, other_var_names, param_names=[], marker="'"): + """Return a dictionary from variable and parameter names + to :class:`islpy.PwAff` instances that represent each of + the variables and parameters, appending marker to + var_names_needing_marker. + + :arg var_names_needing_marker: A :class:`list` of :class:`str` + elements representing variable names to have markers appended. + + :arg other_var_names: A :class:`list` of :class:`str` + elements representing variable names to be included as-is. + + :arg param_names: A :class:`list` of :class:`str` elements + representing parameter names. + + :returns: A dictionary from variable names to :class:`islpy.PwAff` + instances that represent each of the variables + (islvars may be produced by `islpy.make_zero_and_vars`). The key + '0' is also include and represents a :class:`islpy.PwAff` zero constant. + + """ + + def append_marker(items, mark): + new_items = [] + for item in items: + new_items.append(item+mark) + return new_items + + return isl.make_zero_and_vars( + append_marker(var_names_needing_marker, marker) + + other_var_names, param_names) + + +def append_marker_to_strings(strings, marker="'"): + if not isinstance(strings, list): + raise ValueError("append_marker_to_strings did not receive a list") + else: + return [s+marker for s in strings] + + +def append_apostrophes(strings): + return append_marker_to_strings(strings, marker="'") + + def _get_union(list_items): union = list_items[0] for s in list_items[1:]: @@ -176,6 +242,7 @@ def create_symbolic_map_from_tuples( on these values. """ + # TODO clarify this with more comments # TODO allow None for domains dim_type = isl.dim_type @@ -302,6 +369,166 @@ def get_insn_id_from_linearization_item(linearization_item): return linearization_item.insn_id +# TODO for better performance, could combine these funcs so we don't +# loop over linearization more than once +def get_all_nonconcurrent_insn_iname_subsets( + knl, exclude_empty=False, non_conc_inames=None): + """Return a :class:`set` of every unique subset of non-concurrent + inames used in an instruction in a :class:`loopy.LoopKernel`. + + :arg knl: A :class:`loopy.LoopKernel`. + + :arg exclude_empty: A :class:`bool` specifying whether to + exclude the empty set. + + :arg non_conc_inames: A :class:`set` of non-concurrent inames + which may be provided if already known. + + :returns: A :class:`set` of every unique subset of non-concurrent + inames used in any instruction in a :class:`loopy.LoopKernel`. + + """ + + if non_conc_inames is None: + _, non_conc_inames = get_concurrent_inames(knl) + + iname_subsets = set() + for insn in knl.instructions: + iname_subsets.add(insn.within_inames & non_conc_inames) + + if exclude_empty: + iname_subsets.discard(frozenset()) + + return iname_subsets + + +def get_linearization_item_ids_within_inames(knl, inames): + linearization_item_ids = set() + for insn in knl.instructions: + if inames.issubset(insn.within_inames): + linearization_item_ids.add(insn.id) + return linearization_item_ids + + +# TODO use yield to clean this up +# TODO use topological sort from loopy, then find longest path in dag +def _generate_orderings_starting_w_prefix( + allowed_after_dict, orderings, required_length=None, + start_prefix=(), return_first_found=False): + # alowed_after_dict = {str: set(str)} + # start prefix = tuple(str) + # orderings = set + if start_prefix: + next_items = allowed_after_dict[start_prefix[-1]]-set(start_prefix) + else: + next_items = allowed_after_dict.keys() + + if required_length: + if len(start_prefix) == required_length: + orderings.add(start_prefix) + if return_first_found: + return + else: + orderings.add(start_prefix) + if return_first_found: + return + + # return if no more items left + if not next_items: + return + + for next_item in next_items: + new_prefix = start_prefix + (next_item,) + _generate_orderings_starting_w_prefix( + allowed_after_dict, + orderings, + required_length=required_length, + start_prefix=new_prefix, + return_first_found=return_first_found, + ) + if return_first_found and orderings: + return + return + + +def get_orderings_of_length_n( + allowed_after_dict, required_length, return_first_found=False): + """Return all orderings found in tree represented by `allowed_after_dict`. + + :arg allowed_after_dict: A :class:`dict` mapping each :class:`string` + names to a :class:`set` of names that are allowed to come after + that name. + + :arg required_length: A :class:`int` representing the length required + for all orderings. Orderings not matching the required length will + not be returned. + + :arg return_first_found: A :class:`bool` specifying whether to return + the first valid ordering found. + + :returns: A :class:`set` of all orderings that are *explicitly* allowed + by the tree represented by `allowed_after_dict`. I.e., if we know + a->b and c->b, we don't know enough to return a->c->b. Note that + if the set for a dict key is empty, nothing is allowed to come after. + + """ + + orderings = set() + _generate_orderings_starting_w_prefix( + allowed_after_dict, + orderings, + required_length=required_length, + start_prefix=(), + return_first_found=return_first_found, + ) + return orderings + + +def create_graph_from_pairs(before_after_pairs): + # create key for every before + graph = dict([(before, set()) for before, _ in before_after_pairs]) + for before, after in before_after_pairs: + graph[before] = graph[before] | set([after, ]) + return graph + + +# only used for example purposes: + + +def create_explicit_map_from_tuples(tuple_pairs, space): + """Return a :class:`islpy.Map` in :class:`islpy.Space` space + mapping tup_in->tup_out for each `(tup_in, tup_out)` pair + in `tuple_pairs`, where `tup_in` and `tup_out` are + tuples of :class:`int` values to be assigned to the + corresponding dimension variables in `space`. + + """ + + dim_type = isl.dim_type + individual_maps = [] + + for tup_in, tup_out in tuple_pairs: + constraints = [] + for i, val_in in enumerate(tup_in): + constraints.append( + isl.Constraint.equality_alloc(space) + .set_coefficient_val(dim_type.in_, i, 1) + .set_constant_val(-1*val_in)) + for i, val_out in enumerate(tup_out): + constraints.append( + isl.Constraint.equality_alloc(space) + .set_coefficient_val(dim_type.out, i, 1) + .set_constant_val(-1*val_out)) + individual_maps.append( + isl.Map.universe(space).add_constraints(constraints)) + + union_map = individual_maps[0] + for m in individual_maps[1:]: + union_map = union_map.union(m) + + return union_map + + def get_EnterLoop_inames(linearization_items, knl): from loopy.schedule import EnterLoop loop_inames = set() diff --git a/loopy/tools.py b/loopy/tools.py index a1cd5e108a45ba60c71b3bb7a51f779b84172065..594496bf624fc1c8e444fac72012c55b8ecbe914 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -355,6 +355,17 @@ def empty_aligned(shape, dtype, order='C', n=64): # }}} +# {{{ get graph sources + +def get_graph_sources(graph): + sources = set(graph.keys()) + for non_sources in graph.values(): + sources -= non_sources + return sources + +# }}} + + # {{{ pickled container value class _PickledObject(object): diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index a20a798cfa35c64c0cbd7097b41824dda2a35a84..723ff4f36092be9b0b31593b99d6656a0ece26ef 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -76,6 +76,7 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, mem_kind=mem_kind) new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) + # TODO update with dependencies v2 new_knl = add_dependency(kernel=new_knl, insn_match=insn_after, depends_on="id:"+id) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 8432d59ec5b162f6e963abbeae3b2fcabe94cf27..0d18fdc8eac1af777e78871149e55fbc4eeb387d 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -33,6 +33,7 @@ from loopy.symbolic import ( RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError +from pytools import Record __doc__ = """ @@ -79,16 +80,12 @@ __doc__ = """ def set_loop_priority(kernel, loop_priority): from warnings import warn - warn("set_loop_priority is deprecated. Use prioritize_loops instead. " - "Attention: A call to set_loop_priority will overwrite any previously " - "set priorities!", DeprecationWarning, stacklevel=2) - - if isinstance(loop_priority, str): - loop_priority = tuple(s.strip() - for s in loop_priority.split(",") if s.strip()) - loop_priority = tuple(loop_priority) - - return kernel.copy(loop_priority=frozenset([loop_priority])) + warn("set_loop_priority is deprecated. Use constrain_loop_nesting instead. " + "Calling constrain_loop_nesting(kernel, must_nest=loop_priority). " + "Scheduler will now consider provided loop priority a required " + "(must_nest) constraint.", + DeprecationWarning, stacklevel=2) + return constrain_loop_nesting(kernel, must_nest=loop_priority) def prioritize_loops(kernel, loop_priority): @@ -102,15 +99,769 @@ def prioritize_loops(kernel, loop_priority): all calls to prioritize_loops together establish a partial order on the inames (see https://en.wikipedia.org/wiki/Partially_ordered_set). - :arg: an iterable of inames, or, for brevity, a comma-separated string of - inames + :arg loop_priority: an iterable of inames, or, for brevity, a + comma-separated string of inames """ - if isinstance(loop_priority, str): - loop_priority = tuple(s.strip() - for s in loop_priority.split(",") if s.strip()) - loop_priority = tuple(loop_priority) - return kernel.copy(loop_priority=kernel.loop_priority.union([loop_priority])) + from warnings import warn + warn("prioritize_loops is deprecated. Use constrain_loop_nesting instead. " + "Calling constrain_loop_nesting(kernel, must_nest=loop_priority). " + "Scheduler will now consider provided loop priority a required " + "(must_nest) constraint.", + DeprecationWarning, stacklevel=2) + return constrain_loop_nesting(kernel, must_nest=loop_priority) + + +class UnexpandedInameSet(Record): + def __init__(self, inames, complement=False): + Record.__init__( + self, + inames=inames, + complement=complement, + ) + + def contains(self, iname): + return (iname not in self.inames if self.complement + else iname in self.inames) + + def contains_all(self, iname_set): + return (not (iname_set & self.inames) if self.complement + else iname_set.issubset(self.inames)) + + def get_inames_represented(self, iname_universe=None): + """Return the set of inames represented by the UnexpandedInameSet + """ + if self.complement: + if not iname_universe: + raise ValueError( + "Cannot expand UnexpandedInameSet %s without " + "iname_universe." % (self)) + return iname_universe-self.inames + else: + return self.inames.copy() + + def __lt__(self, other): + return self.__hash__() < other.__hash__() + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.inames) + key_builder.rec(key_hash, self.complement) + + def __str__(self): + return "%s{%s}" % ("~" if self.complement else "", + ",".join(i for i in sorted(self.inames))) + + +class LoopNestConstraints(Record): + def __init__(self, must_nest=None, must_not_nest=None, + must_nest_graph=None): + Record.__init__( + self, + must_nest=must_nest, + must_not_nest=must_not_nest, + must_nest_graph=must_nest_graph, + ) + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.must_nest) + key_builder.rec(key_hash, self.must_not_nest) + key_builder.rec(key_hash, self.must_nest_graph) + + def __str__(self): + return "LoopNestConstraints(\n" \ + " must_nest = " + str(self.must_nest) + "\n" \ + " must_not_nest = " + str(self.must_not_nest) + "\n" \ + " must_nest_graph = " + str(self.must_nest_graph) + "\n" \ + ")" + + +def process_loop_nest_specification( + nesting, + max_tuple_size=None, + complement_sets_allowed=True, + ): + # make sure user-supplied nesting conforms to rules + # convert string representations of nestings to tuples of UnexpandedInameSets + + import re + + def raise_loop_nest_input_error(msg): + valid_prio_rules = ( + 'Valid `must_nest` description formats: ' + '"iname, iname, ..." or (str, str, str, ...), ' + 'where str can be of form ' + '"iname" or "{iname, iname, ...}". No set complements allowed.\n' + 'Valid `must_not_nest` description tuples must have len <= 2: ' + '"iname, iname", "iname, ~iname", or ' + '(str, str), where str can be of form ' + '"iname", "~iname", "{iname, iname, ...}", or "~{iname, iname, ...}".' + ) + raise ValueError( + "Invalid loop nest prioritization: %s\n" + "Loop nest prioritization formatting rules:\n%s" + % (msg, valid_prio_rules)) + + def _error_on_regex_match(match_str, target_str): + if re.findall(match_str, target_str): + raise_loop_nest_input_error( + "Unrecognized character(s) %s in nest string %s" + % (re.findall(match_str, target_str), target_str)) + + def _process_iname_set_str(iname_set_str): + # convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet + + # remove leading/trailing whitespace + iname_set_str_stripped = iname_set_str.strip() + + if iname_set_str_stripped[0] == "~": + # Make sure compelement is allowed + if not complement_sets_allowed: + raise_loop_nest_input_error( + "Complement (~) not allowed in this loop nest string %s. " + "If you have a use-case where allowing a currently " + "disallowed set complement would be helpful, and the " + "desired nesting constraint cannot easily be expressed " + "another way, " + "please contact the Loo.py maintainers." + % (iname_set_str)) + + # Make sure that braces are included if multiple inames present + if "," in iname_set_str and not ( + iname_set_str.startswith("~{") and + iname_set_str.endswith("}")): + raise_loop_nest_input_error( + "Complements of sets containing multiple inames must " + "enclose inames in braces: %s is not valid." + % (iname_set_str)) + + complement = True + else: + complement = False + + # remove leading/trailing tilde, braces, and space + iname_set_str_stripped = iname_set_str_stripped.strip("~{} ") + + # should be no remaining special characters besides comma and space + _error_on_regex_match(r'([^,\w ])', iname_set_str_stripped) + + # split by commas or spaces to get inames + inames = re.findall(r'([\w]+)(?:[ |,]*|$)', iname_set_str_stripped) + + # make sure iname count matches what we expect from comma count + if len(inames) != iname_set_str_stripped.count(",") + 1: + raise_loop_nest_input_error( + "Found %d inames but expected %d in string %s." + % (len(inames), iname_set_str_stripped.count(",") + 1, + iname_set_str_stripped)) + + return UnexpandedInameSet( + set([s.strip() for s in iname_set_str_stripped.split(",")]), + complement=complement) + + if isinstance(nesting, str): + # Enforce that priorities involving iname sets be passed as tuple + # Iname sets defined negatively with a single iname are allowed here + + # check for any special characters besides comma, space, and tilde + _error_on_regex_match(r'([^,\w~ ])', nesting) + + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting.split(",")) + else: + # nesting not passed as string + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting) + + # check max_inames_per_set + if max_tuple_size and len(nesting_as_tuple) > max_tuple_size: + raise_loop_nest_input_error( + "Loop nest prioritization tuple %s exceeds max tuple size %d." + % (nesting_as_tuple)) + + # make sure nesting has len > 1 + if len(nesting_as_tuple) <= 1: + raise_loop_nest_input_error( + "Loop nest prioritization tuple %s must have length > 1." + % (nesting_as_tuple)) + + return nesting_as_tuple + + +def _expand_iname_sets_in_tuple( + iname_sets_tuple, # (UnexpandedInameSet, Unex..., ...) + all_inames, + ): + + # First convert negatively defined iname sets to sets + positively_defined_iname_sets = [] + for iname_set in iname_sets_tuple: + positively_defined_iname_sets.append( + iname_set.get_inames_represented(all_inames)) + + # Now expand all priority tuples into (before, after) pairs using + # Cartesian product of all pairs of sets + # (Assumes prio_sets length > 1) + import itertools + loop_priority_pairs = set() + for i, before_set in enumerate(positively_defined_iname_sets[:-1]): + for after_set in positively_defined_iname_sets[i+1:]: + loop_priority_pairs.update( + list(itertools.product(before_set, after_set))) + + # Make sure no priority tuple contains an iname twice + for prio_tuple in loop_priority_pairs: + if len(set(prio_tuple)) != len(prio_tuple): + raise ValueError( + "Loop nesting %s contains cycle: %s. " + % (iname_sets_tuple, prio_tuple)) + return loop_priority_pairs + + +def check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints, must_nest_graph): + # make sure none of the must_nest constraints violate must_not_nest + # this may not catch all problems + + if must_not_nest_constraints and must_nest_graph: + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph.items(): + must_pairs.extend( + list(itertools.product([iname_before], inames_after))) + if any(not check_must_not_nest(must_pairs, must_not_nest_tuple) + for must_not_nest_tuple in must_not_nest_constraints): + raise ValueError( + "Nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest relationships (must_nest graph: %s)." + % (must_not_nest_constraints, must_nest_graph)) + + +def constrain_loop_nesting( + kernel, must_nest=None, must_not_nest=None): + # TODO docstring + # TODO what if someone passes single-iname prio? + # TODO enforce that must_nest be a single tuple not list of tuples + # (or update implementation to allow list of tuples) + + # check for existing constraints + if kernel.loop_nest_constraints: + if kernel.loop_nest_constraints.must_nest: + must_nest_constraints_old = kernel.loop_nest_constraints.must_nest + else: + must_nest_constraints_old = set() + if kernel.loop_nest_constraints.must_not_nest: + must_not_nest_constraints_old = \ + kernel.loop_nest_constraints.must_not_nest + else: + must_not_nest_constraints_old = set() + if kernel.loop_nest_constraints.must_nest_graph: + must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph + else: + must_nest_graph_old = {} + else: + must_nest_constraints_old = set() + must_not_nest_constraints_old = set() + must_nest_graph_old = {} + + # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING) + expand_must_priorities = set() + + if must_nest: + must_nest_tuple = process_loop_nest_specification( + must_nest, complement_sets_allowed=False) + + # don't prioritize concurrent inames: + from loopy.kernel.data import ConcurrentTag + for iname_set in must_nest_tuple: + for iname in iname_set.inames: + if isinstance(kernel.iname_to_tag.get(iname, None), + ConcurrentTag): + raise ValueError( + "iname %s tagged with ConcurrentTag, " + "cannot use iname in must-nest constraint %s." + % (iname, must_nest_tuple)) + + # Update must_nest graph + from pytools.graph import CycleError + try: + must_nest_graph_new = update_must_nest_graph( + must_nest_graph_old, must_nest_tuple, kernel.all_inames()) + except CycleError: + raise ValueError( + "constrain_loop_nesting: Loop priority cycle detected. " + "must_nest constraints %s inconsistent with existing " + "must_nest constraints %s." + % (must_nest_tuple, must_nest_constraints_old)) + + # Check for inconsistent must_nest constraints by checking for cycle: + from pytools.graph import contains_cycle + if contains_cycle(must_nest_graph_new): + # TODO will this ever happen or does check above cover this? + raise ValueError( + "constrain_loop_nesting: Loop priority cycle detected. " + "must_nest constraints %s inconsistent with existing " + "must_nest constraints %s." + % (must_nest_tuple, must_nest_constraints_old)) + + # make sure none of the must_nest constraints violate must_not_nest + # this may not catch all problems + check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints_old, must_nest_graph_new) + + # check for conflicts with inames tagged 'vec' + from loopy.kernel.data import VectorizeTag + for iname, new_tag in six.iteritems(kernel.iname_to_tag): + if isinstance(new_tag, VectorizeTag) and ( + must_nest_graph_new.get(iname, set())): + # iname is not a leaf + raise ValueError( + "Iname %s tagged as 'vec', but loop priorities " + "%s require that iname %s nest outside of inames %s. " + "Vectorized inames must nest innermost; cannot " + "impose loop nest specification." + % (iname, must_nest, iname, + must_nest_graph_new.get(iname, set()))) + + # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING) + expand_must_priorities = _expand_iname_sets_in_tuple( + must_nest_tuple, kernel.all_inames()) + + # Prepare to update value for must_nest constraints + must_nest_constraints_new = must_nest_constraints_old | set( + [must_nest_tuple, ]) + else: + # no new must_nest constraints + must_nest_constraints_new = must_nest_constraints_old + must_nest_graph_new = must_nest_graph_old + + if must_not_nest: + must_not_nest_tuple = process_loop_nest_specification( + must_not_nest, max_tuple_size=2) + + # cycles are allowed in must_not_nest constraints, + # only need to check if incompatible with must_nest_constraints + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph_new.items(): + must_pairs.extend(list(itertools.product([iname_before], inames_after))) + + if not check_must_not_nest(must_pairs, must_not_nest_tuple): + raise ValueError( + "constrain_loop_nesting: nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest constraints %s." + % (must_not_nest_tuple, must_nest_constraints_new)) + + # prepare to update value for must_not_nest constraints + must_not_nest_constraints_new = must_not_nest_constraints_old | set([ + must_not_nest_tuple, ]) + else: + # no new must_not_nest constraints + must_not_nest_constraints_new = must_not_nest_constraints_old + + nest_constraints = LoopNestConstraints( + must_nest=must_nest_constraints_new, + must_not_nest=must_not_nest_constraints_new, + must_nest_graph=must_nest_graph_new, + ) + + return kernel.copy( + loop_priority=kernel.loop_priority.union(expand_must_priorities), + loop_nest_constraints=nest_constraints, + ) + + +def check_must_nest(all_loop_nests, must_nest, all_inames): + # in order to make sure must_nest is satisfied, we + # need to expand all must_nest tiers + + # TODO instead of expanding tiers into all pairs up front, + # create these pairs one at a time so that we can stop as soon as we fail + + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames) + # must_nest_expanded contains pairs + for before, after in must_nest_expanded: + found = False + for nesting in all_loop_nests: + if before in nesting and after in nesting and ( + nesting.index(before) < nesting.index(after)): + found = True + break + if not found: + return False + return True + + +def check_must_not_nest(all_loop_nests, must_not_nest): + # recall that must_not_nest may only contain two tiers + + for nesting in all_loop_nests: + # Go thru each pair in all_loop_nests + for i, iname_before in enumerate(nesting): + for iname_after in nesting[i+1:]: + # Check whether it violates must not nest + if (must_not_nest[0].contains(iname_before) + and must_not_nest[1].contains(iname_after)): + # Stop as soon as we fail + return False + return True + + +def check_all_must_not_nests(all_loop_nests, must_not_nests): + # recall that must_not_nest may only contain two tiers + for must_not_nest in must_not_nests: + if not check_must_not_nest(all_loop_nests, must_not_nest): + return False + return True + + +def is_loop_nesting_valid( + all_loop_nests, + must_nest_constraints, + must_not_nest_constraints, + all_inames): + + # check must-nest constraints + must_nest_valid = True + if must_nest_constraints: + for must_nest in must_nest_constraints: + if not check_must_nest( + all_loop_nests, must_nest, all_inames): + must_nest_valid = False + break + + # check must-not-nest constraints + must_not_nest_valid = True + if must_not_nest_constraints is not None: + for must_not_nest in must_not_nest_constraints: + if not check_must_not_nest( + all_loop_nests, must_not_nest): + must_not_nest_valid = False + break + + return must_nest_valid and must_not_nest_valid + + +def update_must_nest_graph(must_nest_graph, must_nest, all_inames): + from copy import deepcopy + new_graph = deepcopy(must_nest_graph) + + # first, all inames must be a node in the graph: + for iname in all_inames: + if iname not in new_graph.keys(): + new_graph[iname] = set() + + # get (before, after) pairs: + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames) + + # update graph: + for before, after in must_nest_expanded: + new_graph[before].add(after) + + # compute transitive closure: + from pytools.graph import compute_transitive_closure + # TODO compute_transitive_closure now allows cycles; check for cycle separately + return compute_transitive_closure(new_graph) + + +def get_iname_nestings(outline): + from loopy.schedule import EnterLoop, LeaveLoop + # return a list of tuples representing deepest nestings + nestings = [] + current_tiers = [] + already_exiting_loops = False + for outline_item in outline: + if isinstance(outline_item, EnterLoop): + already_exiting_loops = False + current_tiers.append(outline_item.iname) + elif isinstance(outline_item, LeaveLoop): + if not already_exiting_loops: + nestings.append(tuple(current_tiers)) + already_exiting_loops = True + del current_tiers[-1] + return nestings + + +def replace_inames_in_nest_constraints( + inames_to_replace, replacement_inames, old_constraints, + coalesce_duplicate_replacement_inames=False): + """ + :arg inames_to_replace: A set of inames that may exist in + `old_constraints`, each of which is to be replaced with all inames + in `replacement_inames`. + + :arg replacement_inames: A set of inames, all of which will repalce each + iname in `inames_to_replace` in `old_constraints`. + + :arg old_constraints: An iterable of tuples containing one or more + :class:`UnexpandedInameSet` objects. + """ + + # replace each iname in inames_to_replace + # with *all* inames in replacement_inames + + # loop through old_constraints and handle each nesting independently + new_constraints = set() + for old_nesting in old_constraints: + # loop through each iname_set in this nesting and perform replacement + new_nesting = [] + for iname_set in old_nesting: + + # find inames to be replaced + inames_found = inames_to_replace & iname_set.inames + + # create the new set of inames with the replacements + if inames_found: + new_inames = iname_set.inames - inames_found + new_inames.update(replacement_inames) + else: + new_inames = iname_set.inames.copy() + + new_nesting.append( + UnexpandedInameSet(new_inames, iname_set.complement)) + + # if we've removed things, new_nesting might only contain 1 item, + # in which case it's meaningless and we should just remove it + if len(new_nesting) > 1: + new_constraints.add(tuple(new_nesting)) + + # When joining inames, we may need to coalesce: + # e.g., if we join `i` and `j` into `ij`, and old_nesting was + # [{i, k}, {j, h}], at this point we have [{ij, k}, {ij, h}] + # which contains a cycle. If coalescing is enabled, change this + # to [{k}, ij, {h}] to remove the cycle. + if coalesce_duplicate_replacement_inames: + + def coalesce_duplicate_inames_in_nesting(nesting, iname_candidates): + # TODO would like this to be generic, but for now, assumes all + # UnexpandedInameSets have complement=False, which works if + # we're only using this for must_nest constraints since they + # cannot have complements + for iname_set in nesting: + assert not iname_set.complement + + import copy + # copy and convert nesting to list so we can modify + coalesced_nesting = list(copy.deepcopy(nesting)) + + # repeat coalescing step until we don't find any adjacent pairs + # containing duplicates (among iname_candidates) + found_duplicates = True + while found_duplicates: + found_duplicates = False + # loop through each iname_set in nesting and coalesce + # (assume new_nesting has at least 2 items) + i = 0 + while i < len(coalesced_nesting)-1: + iname_set_before = coalesced_nesting[i] + iname_set_after = coalesced_nesting[i+1] + # coalesce for each iname candidate + for iname in iname_candidates: + if (iname_set_before.inames == set([iname, ]) and + iname_set_after.inames == set([iname, ])): + # before/after contain single iname to be coalesced, + # -> remove iname_set_after + del coalesced_nesting[i+1] + found_duplicates = True + elif (iname_set_before.inames == set([iname, ]) and + iname in iname_set_after.inames): + # before contains single iname to be coalesced, + # after contains iname along with others, + # -> remove iname from iname_set_after.inames + coalesced_nesting[i+1] = UnexpandedInameSet( + inames=iname_set_after.inames - set([iname, ]), + complement=iname_set_after.complement, + ) + found_duplicates = True + elif (iname in iname_set_before.inames and + iname_set_after.inames == set([iname, ])): + # after contains single iname to be coalesced, + # before contains iname along with others, + # -> remove iname from iname_set_before.inames + coalesced_nesting[i] = UnexpandedInameSet( + inames=iname_set_before.inames - set([iname, ]), + complement=iname_set_before.complement, + ) + found_duplicates = True + elif (iname in iname_set_before.inames and + iname in iname_set_after.inames): + # before and after contain iname along with others, + # -> remove iname from iname_set_{before,after}.inames + # and insert it in between them + coalesced_nesting[i] = UnexpandedInameSet( + inames=iname_set_before.inames - set([iname, ]), + complement=iname_set_before.complement, + ) + coalesced_nesting[i+1] = UnexpandedInameSet( + inames=iname_set_after.inames - set([iname, ]), + complement=iname_set_after.complement, + ) + coalesced_nesting.insert(i+1, UnexpandedInameSet( + inames=set([iname, ]), + complement=False, + )) + found_duplicates = True + # else, iname was not found in both sets, so do nothing + i = i + 1 + + return tuple(coalesced_nesting) + + # loop through new_constraints; handle each nesting independently + coalesced_constraints = set() + for new_nesting in new_constraints: + coalesced_constraints.add( + coalesce_duplicate_inames_in_nesting( + new_nesting, replacement_inames)) + + return coalesced_constraints + else: + return new_constraints + + +def replace_inames_in_graph( + inames_to_replace, replacement_inames, old_graph): + # replace each iname in inames_to_replace with all inames in replacement_inames + + new_graph = {} + iname_to_replace_found_as_key = False + union_of_inames_after_for_replaced_keys = set() + for iname, inames_after in old_graph.items(): + # create new inames_after + new_inames_after = inames_after.copy() + inames_found = inames_to_replace & new_inames_after + + if inames_found: + new_inames_after -= inames_found + new_inames_after.update(replacement_inames) + + # update dict + if iname in inames_to_replace: + iname_to_replace_found_as_key = True + union_of_inames_after_for_replaced_keys = \ + union_of_inames_after_for_replaced_keys | new_inames_after + # don't add this iname as a key in new graph + else: + new_graph[iname] = new_inames_after + + # add replacement iname keys + if iname_to_replace_found_as_key: + for new_key in replacement_inames: + new_graph[new_key] = union_of_inames_after_for_replaced_keys.copy() + + # check for cycle + from pytools.graph import contains_cycle + if contains_cycle(new_graph): + raise ValueError( + "replace_inames_in_graph: Loop priority cycle detected. " + "Cannot replace inames %s with inames %s." + % (inames_to_replace, replacement_inames)) + + return new_graph + + +def replace_inames_in_all_nest_constraints( + knl, old_inames, new_inames, + coalesce_duplicate_replacement_inames=False, + pairs_that_must_not_voilate_constraints=set(), + ): + + # get old must_nest and must_not_nest + # (must_nest_graph will be rebuilt) + if knl.loop_nest_constraints: + old_must_nest = knl.loop_nest_constraints.must_nest + old_must_not_nest = knl.loop_nest_constraints.must_not_nest + # (these could still be None) + else: + old_must_nest = None + old_must_not_nest = None + + if old_must_nest: + # check to make sure special pairs don't conflict with constraints + for iname_before, iname_after in pairs_that_must_not_voilate_constraints: + if iname_before in knl.loop_nest_constraints.must_nest_graph[ + iname_after]: + raise ValueError( + "Implied nestings violate existing must-nest constraints." + "\nimplied nestings: %s\nmust-nest constraints: %s" + % (pairs_that_must_not_voilate_constraints, old_must_nest)) + + new_must_nest = replace_inames_in_nest_constraints( + old_inames, new_inames, old_must_nest, + coalesce_duplicate_replacement_inames, + ) + else: + new_must_nest = None + + if old_must_not_nest: + # check to make sure special pairs don't conflict with constraints + if not check_all_must_not_nests( + pairs_that_must_not_voilate_constraints, old_must_not_nest): + raise ValueError( + "Implied nestings violate existing must-not-nest constraints." + "\nimplied nestings: %s\nmust-not-nest constraints: %s" + % (pairs_that_must_not_voilate_constraints, old_must_not_nest)) + + new_must_not_nest = replace_inames_in_nest_constraints( + old_inames, new_inames, old_must_not_nest) + # each must not nest constraint may only contain two tiers + # TODO coalesce_duplicate_replacement_inames? + else: + new_must_not_nest = None + + # Rebuild must_nest graph + if new_must_nest: + new_must_nest_graph = {} + new_all_inames = ( + knl.all_inames() - set(old_inames)) | set(new_inames) + from pytools.graph import CycleError + for must_nest_tuple in new_must_nest: + try: + new_must_nest_graph = update_must_nest_graph( + new_must_nest_graph, must_nest_tuple, new_all_inames) + except CycleError: + raise ValueError( + "Loop priority cycle detected when replacing inames %s " + "with inames %s. Previous must_nest constraints: %s" + % (old_inames, new_inames, old_must_nest)) + + # check for cycle + from pytools.graph import contains_cycle + if contains_cycle(new_must_nest_graph): + # TODO will this ever happen or does check above cover this? + raise ValueError( + "Loop priority cycle detected when replacing inames %s " + "with inames %s. Previous must_nest constraints: %s" + % (old_inames, new_inames, old_must_nest)) + + # make sure none of the must_nest constraints violate must_not_nest + # this may not catch all problems + check_must_not_nest_against_must_nest_graph( + new_must_not_nest, new_must_nest_graph) + else: + new_must_nest_graph = None + + return knl.copy( + loop_nest_constraints=LoopNestConstraints( + must_nest=new_must_nest, + must_not_nest=new_must_not_nest, + must_nest_graph=new_must_nest_graph, + ) + ) # }}} @@ -295,12 +1046,17 @@ def _split_iname_backend(kernel, split_iname, new_prio = new_prio + (prio_iname,) new_priorities.append(new_prio) + # update must_nest, must_not_nest, and must_nest_graph + kernel = replace_inames_in_all_nest_constraints( + kernel, set([split_iname, ]), [inner_iname, outer_iname]) + kernel = kernel.copy( domains=new_domains, iname_slab_increments=iname_slab_increments, instructions=new_insns, applied_iname_rewrites=applied_iname_rewrites, - loop_priority=frozenset(new_priorities)) + loop_priority=frozenset(new_priorities), + ) rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) @@ -584,11 +1340,53 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): within_inames=subst_within_inames(insn.within_inames)) for insn in kernel.instructions] + # update must_nest, must_not_nest, and must_nest_graph + # (will fail if cycle is created in must-nest graph) + implied_nestings = set() + inames_orig_order = inames[::-1] # this was reversed + for i, iname_before in enumerate(inames_orig_order[:-1]): + for iname_after in inames_orig_order[i+1:]: + implied_nestings.add((iname_before, iname_after)) + kernel = replace_inames_in_all_nest_constraints( + kernel, set(inames), [new_iname], + coalesce_duplicate_replacement_inames=True, + pairs_that_must_not_voilate_constraints=implied_nestings, + ) + + # update legacy loop_priority + # TODO handle coalescing correctly here (until we remove old prios) + old_loop_priority = kernel.loop_priority + new_loop_priority = None + if old_loop_priority is not None: + new_loop_priority = set() + for old_tup in old_loop_priority: + new_tup = [] + for iname in old_tup: + if iname in inames: + # need to replace iname with new_iname + if new_iname in new_tup[:-1]: + # attempted to join inames with another iname + # in between, error + raise ValueError( + "cannot join inames (%s) involved in legacy " + "loop_priority if another iname is prioritized " + "between them. knl.loop_priority: %s" + % (inames, old_loop_priority)) + elif (not new_tup) or new_iname != new_tup[-1]: + new_tup.append(new_iname) + # (if new_iname == new_tup[-1], don't add it twice + else: + new_tup.append(iname) + if len(new_tup) > 1: + new_loop_priority.update([tuple(new_tup)]) + new_loop_priority = frozenset(new_loop_priority) + kernel = (kernel .copy( instructions=new_insns, domains=domch.get_domains_with(new_domain), - applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] + applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict], + loop_priority=new_loop_priority, )) from loopy.match import parse_stack_match @@ -712,6 +1510,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] from loopy.kernel.data import (ConcurrentTag, ForceSequentialTag, + VectorizeTag, filter_iname_tags_by_type) # {{{ globbing @@ -751,10 +1550,21 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) - if (isinstance(new_tag, ConcurrentTag) - and filter_iname_tags_by_type(old_tags, ForceSequentialTag)): - raise ValueError("cannot tag '%s' as parallel--" - "iname requires sequential execution" % iname) + if isinstance(new_tag, ConcurrentTag): + if filter_iname_tags_by_type(old_tags, ForceSequentialTag): + raise ValueError("cannot tag '%s' as parallel--" + "iname requires sequential execution" % iname) + + # if iname found in must_nest, fail + if kernel.loop_nest_constraints: + must_nest = kernel.loop_nest_constraints.must_nest + if must_nest: + for nesting in must_nest: + for iname_set in nesting: + if iname in iname_set.inames: + raise ValueError("cannot tag '%s' as concurrent--" + "iname involved in must-nest constraint %s." + % (iname, nesting)) if (isinstance(new_tag, ForceSequentialTag) and filter_iname_tags_by_type(old_tags, ConcurrentTag)): @@ -763,6 +1573,19 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): "(likely because of participation in a precompute or " "a reduction)" % iname) + if isinstance(new_tag, VectorizeTag): + # vec_inames will be nested innermost, + # check whether this conflicts with loop priorities + must_nest_graph = (kernel.loop_nest_constraints.must_nest_graph + if kernel.loop_nest_constraints else None) + if must_nest_graph and must_nest_graph.get(iname, set()): + # iname is not a leaf + raise ValueError( + "Loop priorities provided specify that iname %s nest " + "outside of inames %s, but vectorized inames " + "must nest innermost. Cannot tag %s with 'vec' tag." + % (iname, must_nest_graph.get(iname, set()), iname)) + knl_iname_to_tags[iname] = old_tags | frozenset([new_tag]) return kernel.copy(iname_to_tags=knl_iname_to_tags) @@ -878,10 +1701,33 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, from loopy.kernel.tools import DomainChanger domch = DomainChanger(knl, frozenset([old_iname])) + # update must_nest, must_not_nest, and must_nest_graph + # (don't remove any unused inames yet, that happens later) + knl = replace_inames_in_all_nest_constraints( + knl, set([old_iname, ]), [old_iname, new_iname]) + + # update legacy loop_priority + if knl.loop_priority: + new_loop_priority = [] + for iname_tuple in knl.loop_priority: + try: + idx = iname_tuple.index(old_iname) + new_tuple = list(iname_tuple) + new_tuple[idx] = new_iname + new_tuple = tuple(new_tuple) + except ValueError: + new_tuple = iname_tuple + new_loop_priority.append(new_tuple) + new_loop_priority = frozenset(new_loop_priority) + else: + new_loop_priority = knl.loop_priority + from loopy.isl_helpers import duplicate_axes knl = knl.copy( domains=domch.get_domains_with( - duplicate_axes(domch.domain, [old_iname], [new_iname]))) + duplicate_axes(domch.domain, [old_iname], [new_iname])), + loop_priority=new_loop_priority, + ) # }}} @@ -1112,6 +1958,15 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): "--cannot rename" % new_iname) if does_exist: + + if knl.loop_nest_constraints and ( + knl.loop_nest_constraints.must_nest or + knl.loop_nest_constraints.must_not_nest or + knl.loop_nest_constraints.must_nest_graph): + raise NotImplementedError( + "rename_iname() does not yet handle new loop nest " + "constraints when does_exist=True.") + # {{{ check that the domains match up dom = knl.get_inames_domain(frozenset((old_iname, new_iname))) @@ -1236,6 +2091,9 @@ def remove_unused_inames(knl, inames=None): # }}} + # now need to remove inames from loop priorities + knl = replace_inames_in_all_nest_constraints(knl, unused_inames, []) + return knl @@ -1733,6 +2591,8 @@ def make_reduction_inames_unique(kernel, inames=None, within=None): # {{{ duplicate the inames + # TODO need to update inames in priorities + for old_iname, new_iname in r_uniq.old_to_new: from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, frozenset([old_iname])) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index e6ecb4093ad24ceafe521c5379f4d2cd96ea6f52..93848ed78a0ad7ad62c39052de8639743981dd9b 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -118,6 +118,12 @@ def add_dependency(kernel, insn_match, depends_on): return result + +def add_dependencies_v2(knl, new_dependencies): + # TODO implement this function + assert isinstance(new_dependencies, frozenset) + return knl.copy(dependencies=frozenset(knl.dependencies | new_dependencies)) + # }}} diff --git a/test/test_apps.py b/test/test_apps.py index 71029cc9ce408f8e7fa95eaf3b766864c4beee5b..6c201e7770aa5b44d8a5eb126a050272a2168061 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -586,16 +586,14 @@ def test_poisson_fem(ctx_factory): ref_knl = knl - knl = lp.prioritize_loops(knl, ["c", "j", "i", "k"]) + knl = lp.prioritize_loops(knl, ["c", "j", "k", "i"]) def variant_1(knl): knl = lp.precompute(knl, "dpsi", "i,k,ell", default_tag='for') - knl = lp.prioritize_loops(knl, "c,i,j") return knl def variant_2(knl): knl = lp.precompute(knl, "dpsi", "i,ell", default_tag='for') - knl = lp.prioritize_loops(knl, "c,i,j") return knl def add_types(knl): diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 520efba9b678f730474433baf92df8d397469f79..2989005610c8b6ec9d8719b2959c3f18ce7e4a0a 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -31,11 +31,14 @@ from pyopencl.tools import ( # noqa as pytest_generate_tests) from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa import logging +from loopy.kernel import KernelState from loopy import ( preprocess_kernel, get_one_linearized_kernel, ) +lp.set_caching_enabled(False) + logger = logging.getLogger(__name__) try: @@ -46,7 +49,9 @@ else: faulthandler.enable() -def test_lexschedule_and_map_creation(): +# {{{ test PairwiseScheduleBuilder and map creation + +def test_pairwise_schedule_and_map_creation(): import islpy as isl from loopy.schedule.checker import ( get_schedule_for_statement_pair, @@ -400,6 +405,697 @@ def test_lexschedule_and_map_creation(): # insn_d was linearized first, check schedule accordingly perform_insn_cd_checks_with(1, 0) +# }}} + + +# {{{ test statement instance ordering creation + +def test_statement_instance_ordering_creation(): + import islpy as isl + from loopy.schedule.checker import ( + get_schedule_for_statement_pair, + ) + from loopy.schedule.checker.utils import ( + ensure_dim_names_match_and_align, + append_marker_to_isl_map_var_names, + ) + from loopy.schedule.checker.lexicographic_order_map import ( + get_statement_ordering_map, + ) + + # example kernel (add deps to fix loop order) + knl = lp.make_kernel( + [ + "{[i]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c,dep=insn_b} + end + end + for t + e[t] = f[t] {id=insn_d, dep=insn_c} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") + + # get a linearization + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + def check_sio_for_insn_pair( + insn_id_before, + insn_id_after, + expected_lex_order_map, + expected_sio, + ): + + sched_builder = get_schedule_for_statement_pair( + knl, + linearization_items, + insn_id_before, + insn_id_after, + ) + + # Get two isl maps from the PairwiseScheduleBuilder + sched_map_before, sched_map_after = sched_builder.build_maps(knl) + + # get map representing lexicographic ordering + sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space() + + assert sched_lex_order_map == expected_lex_order_map + + # create statement instance ordering, + # maps each statement instance to all statement instances occuring later + sio = get_statement_ordering_map( + sched_map_before, + sched_map_after, + sched_lex_order_map, + ) + + sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio) + + assert sio_aligned == expected_sio + + expected_lex_order_map = isl.Map("{ " + "[_lp_linchk_l0', _lp_linchk_l1', _lp_linchk_l2', _lp_linchk_l3', " + "_lp_linchk_l4']" + " -> " + "[_lp_linchk_l0, _lp_linchk_l1, _lp_linchk_l2, _lp_linchk_l3, " + "_lp_linchk_l4]" + ":" + "(" + "_lp_linchk_l0' < _lp_linchk_l0 " + ") or (" + "_lp_linchk_l0'= _lp_linchk_l0 and " + "_lp_linchk_l1' < _lp_linchk_l1 " + ") or (" + "_lp_linchk_l0'= _lp_linchk_l0 and " + "_lp_linchk_l1'= _lp_linchk_l1 and " + "_lp_linchk_l2' < _lp_linchk_l2 " + ") or (" + "_lp_linchk_l0'= _lp_linchk_l0 and " + "_lp_linchk_l1'= _lp_linchk_l1 and " + "_lp_linchk_l2'= _lp_linchk_l2 and " + "_lp_linchk_l3' < _lp_linchk_l3 " + ") or (" + "_lp_linchk_l0'= _lp_linchk_l0 and " + "_lp_linchk_l1'= _lp_linchk_l1 and " + "_lp_linchk_l2'= _lp_linchk_l2 and " + "_lp_linchk_l3'= _lp_linchk_l3 and " + "_lp_linchk_l4' < _lp_linchk_l4" + ")" + "}") + + # Isl ignores these apostrophes, but test would still pass since it ignores + # variable names when checking for equality. Even so, explicitly add apostrophes + # for sanity. + expected_lex_order_map = append_marker_to_isl_map_var_names( + expected_lex_order_map, isl.dim_type.in_, "'") + + # Relationship between insn_a and insn_b --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj, pk] -> { " + "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:" + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; " + "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:" + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj " + "}" + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair( + "insn_a", "insn_b", expected_lex_order_map, expected_sio) + + # Relationship between insn_a and insn_c --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj, pk] -> { " + "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:" + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; " + "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:" + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj " + "}" + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair( + "insn_a", "insn_c", expected_lex_order_map, expected_sio) + + # Relationship between insn_a and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pk] -> { " + "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, t]:" + "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt " + "}" + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair( + "insn_a", "insn_d", expected_lex_order_map, expected_sio) + + # Relationship between insn_b and insn_c --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj] -> { " + "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i, j]:" + "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; " + "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j]:" + "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; " + "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j=j']:" + "0 <= i' < pi and 0 <= j' < pj " + "}" + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair( + "insn_b", "insn_c", expected_lex_order_map, expected_sio) + + # Relationship between insn_b and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pj] -> { " + "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:" + "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt " + "}" + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair( + "insn_b", "insn_d", expected_lex_order_map, expected_sio) + + # Relationship between insn_c and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pj] -> { " + "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:" + "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt " + "}" + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair( + "insn_c", "insn_d", expected_lex_order_map, expected_sio) + +# }}} + + +def test_linearization_checker_with_loop_prioritization(): + knl = lp.make_kernel( + [ + "{[i]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c} + end + end + for t + e[t] = f[t] {id=insn_d} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_matmul(): + bsize = 16 + knl = lp.make_kernel( + "{[i,k,j]: 0<=i {[i,j]: 0<=i {[i]: 0<=i xi = qpts[1, i2] + <> s = 1-xi + <> r = xi/s + <> aind = 0 {id=aind_init} + for alpha1 + <> w = s**(deg-alpha1) {id=init_w} + for alpha2 + tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \ + {id=write_tmp,dep=init_w:aind_init} + w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ + {id=update_w,dep=init_w:write_tmp} + aind = aind + 1 \ + {id=aind_incr,dep=aind_init:write_tmp:update_w} + end + end + end + """, + [lp.GlobalArg("coeffs", None, shape=None), "..."], + name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1") + knl = lp.add_and_infer_dtypes(knl, + dict(coeffs=np.float32, qpts=np.int32)) + knl = lp.fix_parameters(knl, nqp1d=7, deg=4) + knl = lp.split_iname(knl, "el", 16, inner_tag="l.0") + knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0", + inner_tag="ilp", slabs=(0, 1)) + knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr")) + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_nop(): + knl = lp.make_kernel( + [ + "{[b]: b_start<=b c_end = 2 + for c + ... nop + end + end + """, + "...", + seq_dependencies=True) + knl = lp.fix_parameters(knl, dim=3) + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_multi_domain(): + knl = lp.make_kernel( + [ + "{[i]: 0<=iacc = 0 {id=insn0} + for j + for k + acc = acc + j + k {id=insn1,dep=insn0} + end + end + end + end + """, + name="nest_multi_dom", + assumptions="ni,nj,nk,nx >= 1", + lang_version=(2018, 2) + ) + knl = lp.prioritize_loops(knl, "x,xx,i") + knl = lp.prioritize_loops(knl, "i,j") + knl = lp.prioritize_loops(knl, "j,k") + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_loop_carried_deps(): + knl = lp.make_kernel( + "{[i]: 0<=iacc0 = 0 {id=insn0} + for i + acc0 = acc0 + i {id=insn1,dep=insn0} + <>acc2 = acc0 + i {id=insn2,dep=insn1} + <>acc3 = acc2 + i {id=insn3,dep=insn2} + <>acc4 = acc0 + i {id=insn4,dep=insn1} + end + """, + name="loop_carried_deps", + assumptions="n >= 1", + lang_version=(2018, 2) + ) + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_and_invalid_prioritiy_detection(): + ref_knl = lp.make_kernel( + [ + "{[h]: 0<=h acc = 0 + for h,i,j,k + acc = acc + h + i + j + k + end + """, + name="priorities", + assumptions="ni,nj,nk,nh >= 1", + lang_version=(2018, 2) + ) + + # no error: + knl0 = lp.prioritize_loops(ref_knl, "h,i") + knl0 = lp.prioritize_loops(ref_knl, "i,j") + knl0 = lp.prioritize_loops(knl0, "j,k") + + unprocessed_knl = knl0.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl0 = lp.add_dependencies_v2( # pylint:disable=no-member + knl0, deps) + + # get a linearization to check + if knl0.state < KernelState.PREPROCESSED: + knl0 = preprocess_kernel(knl0) + knl0 = get_one_linearized_kernel(knl0) + linearization_items = knl0.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # no error: + knl1 = lp.prioritize_loops(ref_knl, "h,i,k") + knl1 = lp.prioritize_loops(knl1, "h,j,k") + + unprocessed_knl = knl1.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl1 = lp.add_dependencies_v2( # pylint:disable=no-member + knl1, deps) + + # get a linearization to check + if knl1.state < KernelState.PREPROCESSED: + knl1 = preprocess_kernel(knl1) + knl1 = get_one_linearized_kernel(knl1) + linearization_items = knl1.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # error (cycle): + knl2 = lp.prioritize_loops(ref_knl, "h,i,j") + knl2 = lp.prioritize_loops(knl2, "j,k") + # TODO think about when legacy deps should be updated based on prio changes + + try: + if hasattr(lp, "constrain_loop_nesting"): + knl2 = lp.constrain_loop_nesting(knl2, "k,i") # pylint:disable=no-member + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl2) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl2 = lp.add_dependencies_v2( # pylint:disable=no-member + knl2, deps) + else: + knl2 = lp.prioritize_loops(knl2, "k,i") + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl2) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl2 = lp.add_dependencies_v2( # pylint:disable=no-member + knl2, deps) + + unprocessed_knl = knl2.copy() + + # get a linearization to check + if knl2.state < KernelState.PREPROCESSED: + knl2 = preprocess_kernel(knl2) + knl2 = get_one_linearized_kernel(knl2) + linearization_items = knl2.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + # should raise error + assert False + except ValueError as e: + if hasattr(lp, "constrain_loop_nesting"): + assert "cycle detected" in str(e) + else: + assert "invalid priorities" in str(e) + + # error (inconsistent priorities): + knl3 = lp.prioritize_loops(ref_knl, "h,i,j,k") + # TODO think about when legacy deps should be updated based on prio changes + try: + if hasattr(lp, "constrain_loop_nesting"): + knl3 = lp.constrain_loop_nesting( # pylint:disable=no-member + knl3, "h,j,i,k") + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl3) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl3 = lp.add_dependencies_v2( # pylint:disable=no-member + knl3, deps) + else: + knl3 = lp.prioritize_loops(knl3, "h,j,i,k") + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl3) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl3 = lp.add_dependencies_v2( # pylint:disable=no-member + knl3, deps) + + unprocessed_knl = knl3.copy() + + # get a linearization to check + if knl3.state < KernelState.PREPROCESSED: + knl3 = preprocess_kernel(knl3) + knl3 = get_one_linearized_kernel(knl3) + linearization_items = knl3.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + # should raise error + assert False + except ValueError as e: + if hasattr(lp, "constrain_loop_nesting"): + assert "cycle detected" in str(e) + else: + assert "invalid priorities" in str(e) + +# TODO create more kernels with invalid linearizations to test linearization checker + if __name__ == "__main__": if len(sys.argv) > 1: diff --git a/test/test_loopy.py b/test/test_loopy.py index 61a3f167be66f1c99adc3a52473d8edc747479e1..ecb8e6ff0567d4ebd61c158d3acfd7f19314e92f 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -55,6 +55,1326 @@ __all__ = [ from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa +def test_new_loop_priority_backward_compatibility(): + ref_knl = lp.make_kernel( + "{ [g,h,i,j,k]: 0<=g,h,i,j,ktemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c} + end + end + for t + e[t] = f[t] {id=insn_d} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") + unprocessed_knl = knl.copy() + + deps = create_dependencies_from_legacy_knl( + unprocessed_knl) + knl = lp.add_dependencies_v2(knl, deps) + + # get a linearization to check + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # matmul ------- + bsize = 16 + knl = lp.make_kernel( + "{[i,k,j]: 0<=i {[i,j]: 0<=i {[i]: 0<=i xi = qpts[1, i2] + <> s = 1-xi + <> r = xi/s + <> aind = 0 {id=aind_init} + for alpha1 + <> w = s**(deg-alpha1) {id=init_w} + for alpha2 + tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \ + {id=write_tmp,dep=init_w:aind_init} + w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ + {id=update_w,dep=init_w:write_tmp} + aind = aind + 1 \ + {id=aind_incr,dep=aind_init:write_tmp:update_w} + end + end + end + """, + [lp.GlobalArg("coeffs", None, shape=None), "..."], + name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1") + knl = lp.add_and_infer_dtypes(knl, + dict(coeffs=np.float32, qpts=np.int32)) + knl = lp.fix_parameters(knl, nqp1d=7, deg=4) + knl = lp.split_iname(knl, "el", 16, inner_tag="l.0") + knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0", + inner_tag="ilp", slabs=(0, 1)) + knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr")) + + unprocessed_knl = knl.copy() + + deps = create_dependencies_from_legacy_knl( + unprocessed_knl) + knl = lp.add_dependencies_v2(knl, deps) + + # get a linearization to check + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # nop ------- + knl = lp.make_kernel( + [ + "{[b]: b_start<=b c_end = 2 + for c + ... nop + end + end + """, + "...", + seq_dependencies=True) + + unprocessed_knl = knl.copy() + + deps = create_dependencies_from_legacy_knl( + unprocessed_knl) + knl = lp.add_dependencies_v2(knl, deps) + + # get a linearization to check + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # multi_domain ------- + knl = lp.make_kernel( + [ + "{[i]: 0<=iacc = 0 {id=insn0} + for j + for k + acc = acc + j + k {id=insn1,dep=insn0} + end + end + end + end + """, + name="nest_multi_dom", + assumptions="ni,nj,nk,nx >= 1", + lang_version=(2018, 2) + ) + knl = lp.prioritize_loops(knl, "x,xx,i") + knl = lp.prioritize_loops(knl, "i,j") + knl = lp.prioritize_loops(knl, "j,k") + + unprocessed_knl = knl.copy() + + deps = create_dependencies_from_legacy_knl( + unprocessed_knl) + knl = lp.add_dependencies_v2(knl, deps) + + # get a linearization to check + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # loop_carried_deps ------- + knl = lp.make_kernel( + "{[i]: 0<=iacc0 = 0 {id=insn0} + for i + acc0 = acc0 + i {id=insn1,dep=insn0} + <>acc2 = acc0 + i {id=insn2,dep=insn1} + <>acc3 = acc2 + i {id=insn3,dep=insn2} + <>acc4 = acc0 + i {id=insn4,dep=insn1} + end + """, + name="loop_carried_deps", + assumptions="n >= 1", + lang_version=(2018, 2) + ) + + unprocessed_knl = knl.copy() + + deps = create_dependencies_from_legacy_knl( + unprocessed_knl) + knl = lp.add_dependencies_v2(knl, deps) + + # get a linearization to check + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_iname_coalescing_in_loop_nest_constraints(): + lp.set_caching_enabled(False) + # without ^this, changing these tests has no effect (cached version gets used) + + def get_sets_of_inames(iname_sets_tuple, iname_universe): + # convert UnexpandedInameSets to sets + sets_of_inames = [] + for iname_set in iname_sets_tuple: + sets_of_inames.append( + iname_set.get_inames_represented(iname_universe)) + return sets_of_inames + + ref_knl = lp.make_kernel( + "{ [g,h,i,j,k]: 0<=g,h,i,j,k<1024 }", + ''' + out[g,h,i,j,k] = 2*a[g,h,i,j,k] {id=insn} + ''', + ) + # (join_inames errors if domain bound is variable) + + ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)}) + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("i", "g", "h", "j", "k"), + ) + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + new_must_nest = get_sets_of_inames( + list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames()) + expected_must_nest = [ + set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])] + assert new_must_nest == expected_must_nest + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("{i, g}", "h", "j", "k"), + ) + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + new_must_nest = get_sets_of_inames( + list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames()) + expected_must_nest = [ + set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])] + assert new_must_nest == expected_must_nest + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("i", "g", "{h, j}", "k"), + ) + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + new_must_nest = get_sets_of_inames( + list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames()) + expected_must_nest = [ + set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])] + assert new_must_nest == expected_must_nest + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("i", "g", "{h, j, k}"), + ) + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + new_must_nest = get_sets_of_inames( + list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames()) + expected_must_nest = [ + set(["i", ]), set(["gh", ]), set(["j", "k"])] + assert new_must_nest == expected_must_nest + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("i", "{g, h}", "j", "k"), + ) + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + new_must_nest = get_sets_of_inames( + list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames()) + expected_must_nest = [ + set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])] + assert new_must_nest == expected_must_nest + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("{i, g}", "{h, j, k}"), + ) + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + new_must_nest = get_sets_of_inames( + list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames()) + expected_must_nest = [ + set(["i", ]), set(["gh", ]), set(["j", "k"])] + assert new_must_nest == expected_must_nest + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("i", "g", "j", "h", "k"), + ) + try: + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + assert False + except ValueError as e: + assert "contains cycle" in str(e) + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("{i, g}", "j", "{h, k}"), + ) + try: + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + assert False + except ValueError as e: + assert "contains cycle" in str(e) + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_nest=("{i, h}", "j", "{g, k}"), + ) + try: + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + assert False + except ValueError as e: + assert "nestings violate existing must-nest" in str(e) + + knl = ref_knl + knl = lp.constrain_loop_nesting( + knl, + must_not_nest=("g", "h"), + ) + try: + knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh") + assert False + except ValueError as e: + assert "nestings violate existing must-not-nest" in str(e) + + def test_globals_decl_once_with_multi_subprogram(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -705,8 +2025,16 @@ def test_ilp_loop_bound(ctx_factory): ref_knl = knl - knl = lp.prioritize_loops(knl, "j,i,k") + # before new loop nest constraints/scheduling, + # prioritized inames could also be parallel: + #knl = lp.prioritize_loops(knl, "j,i,k") + #knl = lp.split_iname(knl, "k", 4, inner_tag="ilp") + + # after new loop nest constraints/scheduling... + # TODO this produces a different linearization, something wrong with ILP? + knl = lp.constrain_loop_nesting(knl, must_nest=("i", "j")) knl = lp.split_iname(knl, "k", 4, inner_tag="ilp") + knl = lp.constrain_loop_nesting(knl, must_nest=("i", "k_outer")) lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict( @@ -742,7 +2070,6 @@ def test_slab_decomposition_does_not_double_execute(ctx_factory): knl = ref_knl knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="unr", outer_tag=outer_tag) - knl = lp.prioritize_loops(knl, "i_outer") a = cl.array.empty(queue, 20, np.float32) a.fill(17) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 54b608a183840cc5d33f1e738f36fc605d16d94a..0ff44bf86b8755e17b0144b4a57f1e0092353218 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -51,6 +51,8 @@ from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa @pytest.mark.parametrize("Nq", [7]) @pytest.mark.parametrize("opt_level", [11]) def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa + # TODO linearization search for this kernel finds dead ends, no linearization + 1/0 # TODO this prevents this test from running forever, remove when fixed ctx = ctx_factory() filename = os.path.join(os.path.dirname(__file__), "strongVolumeKernels.f90") @@ -74,7 +76,6 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa set_q_storage_format, set_D_storage_format) hsv = lp.fix_parameters(hsv, Nq=Nq) - hsv = lp.prioritize_loops(hsv, "e,k,j,i") hsv = lp.tag_inames(hsv, dict(e="g.0", j="l.1", i="l.0")) hsv = lp.assume(hsv, "elements >= 1") diff --git a/test/test_transform.py b/test/test_transform.py index ffef893b05fbca5a0d244ff17f379e1bb5cf27a1..ebf6b36f062942440886a6713a30f96ef7c51622 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -66,7 +66,6 @@ def test_chunk_iname(ctx_factory): ref_knl = knl knl = lp.chunk_iname(knl, "i", 3, inner_tag="l.0") - knl = lp.prioritize_loops(knl, "i_outer, i_inner") lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=130)) @@ -445,7 +444,9 @@ def test_precompute_with_preexisting_inames(ctx_factory): knl = lp.precompute(knl, "D2_subst", "i,k", default_tag="for", precompute_inames="ii,jj") - knl = lp.prioritize_loops(knl, "ii,jj,e,j,k") + knl = lp.prioritize_loops(knl, "ii,jj") + knl = lp.prioritize_loops(knl, "e,j") + knl = lp.prioritize_loops(knl, "e,k") lp.auto_test_vs_ref( ref_knl, ctx, knl,