From 5465ba4e3893771465a8f1da23197cc6cf8aa533 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 13 Jan 2018 18:13:03 -0600 Subject: [PATCH 01/17] First version of iname with deps --- loopy/transform/iname.py | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index cd8ec409c..869b053c5 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -763,6 +763,63 @@ class _InameDuplicator(RuleAwareIdentityMapper): for iname in insn.within_inames) return insn.copy(within_inames=new_fid) +def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, + tags={}): + """ + This will duplicate the particular iname in all the insns that are + dependendent on the current insn or even that depend on the current insn. + The working of this is very simple, it just calls + :func:`lp.duplicate_inames`. + + :arg within: a stack match as understood by + :func:`loopy.match.parse_stack_match`. + """ + from loopy.match import parse_stack_match + insns = parse_stack_match(within) + + from loopy.kernel.tools import find_reverse_dependencies + + rev_depends = frozenset([]) + depends = frozenset([]) + + from collections import deque + + q = deque(frozenset([insns])) + q_rev = deque(find_reverse_dependencies(knl, insns)) + + # This is a graph traversal + while len(q) != 0: + elem = q.pop() + # TODO: need to convert the id to insn + elem_insn = elem.copy() + if iname in elem_insn.within_inames: + depends.add(elem) + for e in elem.depends_on: + # adding the deps of the current insn into the queue so that + # particular insn could also be visited + q.append(e) + + while len(q_rev) != 0: + elem = q.pop() + # TODO: need to convert the id to insn + elem_insn = elem.copy() + if iname in elem_insn.within_inames: + rev_depends.add(elem) + for e in find_reverse_dependencies(knl, "id:"+str(elem)): + # adding the parent deps of the current insn into the queue so + # that particular insn could also be visited + q_rev.append(e) + + # Making a union of both these sets. + # As it has been realized that we would need to do that for both of those. + + q != q_rev + + for k in q: + knl = duplicate_inames(knl, "id"+str(k), new_iname, suffix, tags) + + return knl + def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, tags={}): -- GitLab From f205214763a0016def83064c320ef92aa2f84b13 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 13 Jan 2018 18:26:26 -0600 Subject: [PATCH 02/17] made changes from insn_id to actual insn in the functino --- loopy/transform/iname.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 869b053c5..4e0f30d10 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -790,8 +790,7 @@ def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, # This is a graph traversal while len(q) != 0: elem = q.pop() - # TODO: need to convert the id to insn - elem_insn = elem.copy() + elem_insn = knl.id_to_insn(elem) if iname in elem_insn.within_inames: depends.add(elem) for e in elem.depends_on: @@ -801,8 +800,7 @@ def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, while len(q_rev) != 0: elem = q.pop() - # TODO: need to convert the id to insn - elem_insn = elem.copy() + elem_insn = knl.id_to_insn(elem) if iname in elem_insn.within_inames: rev_depends.add(elem) for e in find_reverse_dependencies(knl, "id:"+str(elem)): -- GitLab From 268dd8de733d0553e0f8beda98170fef5ea9b8a1 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 13 Jan 2018 19:25:09 -0600 Subject: [PATCH 03/17] Got started with checking whether an option needs expensive duplication. Need something equivalent to `node.free_indices` --- loopy/transform/iname.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 4e0f30d10..c160d7fd0 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -763,8 +763,28 @@ class _InameDuplicator(RuleAwareIdentityMapper): for iname in insn.within_inames) return insn.copy(within_inames=new_fid) + +def need_expensive_duplication(knl, iname, within): + """ + This function checks whether we would need expensive duplication instead of + our normal free duplication. This would check the free iname associated + with the instruction. + + :arg within: a stack match as understood by + :func:`loopy.match.parse_stack_match`. + """ + from loopy.match import parse_stack_match + insns = parse_stack_match(within) + + assert len(insns) == 1, ("There has to be only 1 match with the function" + " given %s to perform the check. Please try to be" + " more specific.") + + # TODO: this is a BIG todo. + + def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, - tags={}): + tags={}): """ This will duplicate the particular iname in all the insns that are dependendent on the current insn or even that depend on the current insn. -- GitLab From 6f29e9172aaf75ba2bfc43f711f29a90ec722d89 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 17 Jan 2018 05:00:28 -0600 Subject: [PATCH 04/17] Wrote a function to check that the iname lies within the indices --- loopy/transform/iname.py | 41 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c160d7fd0..849b0c7b4 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -764,7 +764,7 @@ class _InameDuplicator(RuleAwareIdentityMapper): return insn.copy(within_inames=new_fid) -def need_expensive_duplication(knl, iname, within): +def need_duplication_with_deps(knl, iname, within): """ This function checks whether we would need expensive duplication instead of our normal free duplication. This would check the free iname associated @@ -773,6 +773,7 @@ def need_expensive_duplication(knl, iname, within): :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. """ + from pymbolic.primitves import (Subscript, Variable) from loopy.match import parse_stack_match insns = parse_stack_match(within) @@ -780,7 +781,43 @@ def need_expensive_duplication(knl, iname, within): " given %s to perform the check. Please try to be" " more specific.") - # TODO: this is a BIG todo. + # Getting the instruction of interest + insn = knl.id_to_insn(insns[0]) + + # Handling the LHS + lhs = insn.assignee + if not isinstance(lhs, Subscript): + return True + if Variable(iname) not in lhs.index: + return True + + # The LHS is now compliant with free duplication + # analyzing the RHS now + rhs = insn.expression + import deque + rhs_queue = deque() + rhs_queue.append(rhs) + + while len(rhs_queue) != 0: + elem = rhs_queue.pop() + if isinstance(elem, Variable): + # encountered non-indexed variable and hence needs dupl_with_deps + return True + elif isinstance(elem, Subscript): + if not Variable(iname) in elem.index: + # iname not in indices needs dupl_with_deps + return True + elif 'children' in dir(elem): + # the element is also an expression + for child in elem.children: + rhs_queue.append(child) + else: + # these are the ints, floats in the expression and hence ignored + pass + + # all the reason needed for the expensive duplication have been negated, + # hence we can go ahead and do free duplication + return False def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, -- GitLab From 6a74a379df747eccdcfe0e0b6fc875abc3434b65 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 17 Jan 2018 05:24:38 -0600 Subject: [PATCH 05/17] Added check function for isomorphic duplication --- loopy/transform/iname.py | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 849b0c7b4..9c818f264 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -820,17 +820,17 @@ def need_duplication_with_deps(knl, iname, within): return False -def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, - tags={}): - """ - This will duplicate the particular iname in all the insns that are - dependendent on the current insn or even that depend on the current insn. - The working of this is very simple, it just calls - :func:`lp.duplicate_inames`. +def is_isomorphic_duplication(knl, iname, within): + insns_related_to_inames = frozenset([]) + for insn in knl.instruction: + insns_related_to_inames |= insn.within_inames - :arg within: a stack match as understood by - :func:`loopy.match.parse_stack_match`. - """ + forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, insn) + + return forward_and_rev_deps == insns_related_to_inames + + +def get_forward_and_rev_deps(knl, iname, within): from loopy.match import parse_stack_match insns = parse_stack_match(within) @@ -867,10 +867,26 @@ def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, # Making a union of both these sets. # As it has been realized that we would need to do that for both of those. + return q | q_rev + +def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, + tags={}): + """ + This will duplicate the particular iname in all the insns that are + dependendent on the current insn or even that depend on the current insn. + The working of this is very simple, it just calls + :func:`lp.duplicate_inames`. + + :arg within: a stack match as understood by + :func:`loopy.match.parse_stack_match`. + """ + + + q_forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) q != q_rev - for k in q: + for k in q_forward_and_rev_deps: knl = duplicate_inames(knl, "id"+str(k), new_iname, suffix, tags) return knl -- GitLab From 4c55bc56b3229d2267310c5fcc679201ede0bed0 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 17 Jan 2018 13:04:15 -0600 Subject: [PATCH 06/17] Fixed the error about not updating frozensets in the newly created functions. --- loopy/transform/iname.py | 72 +++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 9c818f264..d75b51ca9 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -50,6 +50,8 @@ __doc__ = """ .. autofunction:: get_iname_duplication_options +.. autofunction:: need_duplication_with_deps + .. autofunction:: has_schedulable_iname_nesting .. autofunction:: prioritize_loops @@ -773,28 +775,30 @@ def need_duplication_with_deps(knl, iname, within): :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. """ - from pymbolic.primitves import (Subscript, Variable) + from pymbolic.primitives import (Subscript, Variable) from loopy.match import parse_stack_match insns = parse_stack_match(within) - assert len(insns) == 1, ("There has to be only 1 match with the function" - " given %s to perform the check. Please try to be" - " more specific.") - # Getting the instruction of interest - insn = knl.id_to_insn(insns[0]) + for insn in knl.instructions: + if insns(knl, insn, ()): + break # Handling the LHS lhs = insn.assignee if not isinstance(lhs, Subscript): return True - if Variable(iname) not in lhs.index: - return True + if isinstance(lhs.index, Variable): + if Variable(iname) != lhs.index: + return True + else: + if Variable(iname) not in lhs.index: + return True # The LHS is now compliant with free duplication # analyzing the RHS now rhs = insn.expression - import deque + from collections import deque rhs_queue = deque() rhs_queue.append(rhs) @@ -804,9 +808,13 @@ def need_duplication_with_deps(knl, iname, within): # encountered non-indexed variable and hence needs dupl_with_deps return True elif isinstance(elem, Subscript): - if not Variable(iname) in elem.index: - # iname not in indices needs dupl_with_deps - return True + if isinstance(elem.index, Variable): + if Variable(iname) != elem.index: + return True + else: + if not Variable(iname) in elem.index: + # iname not in indices needs dupl_with_deps + return True elif 'children' in dir(elem): # the element is also an expression for child in elem.children: @@ -821,18 +829,28 @@ def need_duplication_with_deps(knl, iname, within): def is_isomorphic_duplication(knl, iname, within): + # intended to store all the insn ids related to a given iname insns_related_to_inames = frozenset([]) - for insn in knl.instruction: - insns_related_to_inames |= insn.within_inames + for insn in knl.instructions: + if iname in insn.within_inames: + insns_related_to_inames |= frozenset([insn.id]) - forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, insn) + forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) return forward_and_rev_deps == insns_related_to_inames def get_forward_and_rev_deps(knl, iname, within): - from loopy.match import parse_stack_match - insns = parse_stack_match(within) + from loopy.kernel.instruction import Assignment + + if isinstance(within, Assignment): + insn = within + else: + from loopy.match import parse_stack_match + insns = parse_stack_match(within) + for insn in knl.instructions: + if insns(knl, insn, ()): + break from loopy.kernel.tools import find_reverse_dependencies @@ -841,16 +859,19 @@ def get_forward_and_rev_deps(knl, iname, within): from collections import deque - q = deque(frozenset([insns])) - q_rev = deque(find_reverse_dependencies(knl, insns)) + insn_id = insn.id + # initializes the queue of insn ids for forwards dependencies + q = deque(frozenset([insn_id])) + # initializes the queue of insn ids of the reverse dependencies + q_rev = deque(find_reverse_dependencies(knl, frozenset([insn]))) # This is a graph traversal while len(q) != 0: elem = q.pop() - elem_insn = knl.id_to_insn(elem) + elem_insn = knl.id_to_insn[elem] if iname in elem_insn.within_inames: - depends.add(elem) - for e in elem.depends_on: + depends |= frozenset([elem]) + for e in elem_insn.depends_on: # adding the deps of the current insn into the queue so that # particular insn could also be visited q.append(e) @@ -859,7 +880,7 @@ def get_forward_and_rev_deps(knl, iname, within): elem = q.pop() elem_insn = knl.id_to_insn(elem) if iname in elem_insn.within_inames: - rev_depends.add(elem) + rev_depends |= frozenset([elem]) for e in find_reverse_dependencies(knl, "id:"+str(elem)): # adding the parent deps of the current insn into the queue so # that particular insn could also be visited @@ -867,7 +888,7 @@ def get_forward_and_rev_deps(knl, iname, within): # Making a union of both these sets. # As it has been realized that we would need to do that for both of those. - return q | q_rev + return depends | rev_depends def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, tags={}): @@ -881,11 +902,8 @@ def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, :func:`loopy.match.parse_stack_match`. """ - q_forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) - q != q_rev - for k in q_forward_and_rev_deps: knl = duplicate_inames(knl, "id"+str(k), new_iname, suffix, tags) -- GitLab From 3ee0062eb7c7c7c74b60a17b815c01d3bebdc667 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 19 Jan 2018 19:37:44 -0600 Subject: [PATCH 07/17] Cleaning touches to `iname.py` --- loopy/transform/iname.py | 51 +++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index d75b51ca9..b821e2b27 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -766,11 +766,11 @@ class _InameDuplicator(RuleAwareIdentityMapper): return insn.copy(within_inames=new_fid) -def need_duplication_with_deps(knl, iname, within): +def needs_duplication_with_deps(knl, iname, within): """ - This function checks whether we would need expensive duplication instead of - our normal free duplication. This would check the free iname associated - with the instruction. + This function checks whether we would need non-trivial duplication instead + of our normal free duplication. This would check the indices of the + associated variables in the instructions :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. @@ -829,6 +829,17 @@ def need_duplication_with_deps(knl, iname, within): def is_isomorphic_duplication(knl, iname, within): + """ + This function checks whether the duplication of the given iname would solve + any purpose. This should be a check for the user to perform before + performing the :func:`loopy.duplicate_iname_with_deps`. If the function + returns `True`, that is a signal to **NOT** perform the duplication, and + instead explore other available options. + + + :arg within: a stack match as understood by + :func:`loopy.match.parse_stack_match`. + """ # intended to store all the insn ids related to a given iname insns_related_to_inames = frozenset([]) for insn in knl.instructions: @@ -841,6 +852,18 @@ def is_isomorphic_duplication(knl, iname, within): def get_forward_and_rev_deps(knl, iname, within): + """ + This function is typically not needed from the perspective of a user. The + main objective of the function would be to find out all the forward and + reverse dependencies that are needed for for the duplication of an iname + in a instruction. Returns a `frozenset` of forward and backward + dependencies of an instruction which have the particular `iname` as one of + their `within_inames` + + + :arg within: a stack match as understood by + :func:`loopy.match.parse_stack_match`. + """ from loopy.kernel.instruction import Assignment if isinstance(within, Assignment): @@ -863,7 +886,7 @@ def get_forward_and_rev_deps(knl, iname, within): # initializes the queue of insn ids for forwards dependencies q = deque(frozenset([insn_id])) # initializes the queue of insn ids of the reverse dependencies - q_rev = deque(find_reverse_dependencies(knl, frozenset([insn]))) + q_rev = deque(find_reverse_dependencies(knl, frozenset([insn_id]))) # This is a graph traversal while len(q) != 0: @@ -877,11 +900,11 @@ def get_forward_and_rev_deps(knl, iname, within): q.append(e) while len(q_rev) != 0: - elem = q.pop() - elem_insn = knl.id_to_insn(elem) + elem = q_rev.pop() + elem_insn = knl.id_to_insn[elem] if iname in elem_insn.within_inames: rev_depends |= frozenset([elem]) - for e in find_reverse_dependencies(knl, "id:"+str(elem)): + for e in find_reverse_dependencies(knl, frozenset([elem])): # adding the parent deps of the current insn into the queue so # that particular insn could also be visited q_rev.append(e) @@ -890,6 +913,7 @@ def get_forward_and_rev_deps(knl, iname, within): # As it has been realized that we would need to do that for both of those. return depends | rev_depends + def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, tags={}): """ @@ -897,15 +921,24 @@ def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, dependendent on the current insn or even that depend on the current insn. The working of this is very simple, it just calls :func:`lp.duplicate_inames`. + .. note:: + Currently this would only take a single iname for duplicating. For + multiple inames please use the same function multiple number of times. :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. """ + # Getting the relevant forward and backward dependencies. + # On these instructions we need to perform the duplication q_forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) for k in q_forward_and_rev_deps: - knl = duplicate_inames(knl, "id"+str(k), new_iname, suffix, tags) + # duplicating single instruction at a time + from loopy.match import Id, Or + insn_match_or = Or(tuple([Id(k)])) + knl = duplicate_inames(knl, [iname], insn_match_or, + new_iname, suffix, tags) return knl -- GitLab From 2e59341ad45a11a7394e92bc10dcdc87660222cc Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 19 Jan 2018 20:06:01 -0600 Subject: [PATCH 08/17] Added the test for this branch --- test/test_loopy.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/test_loopy.py b/test/test_loopy.py index e624ed346..5a33ab8a5 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1792,6 +1792,74 @@ def test_unschedulable_kernel_detection(): assert len(list(lp.get_iname_duplication_options(knl))) == 10 +def test_duplication_options_with_deps(ctx_factory): + import loopy as lp + from loopy.transform.iname import (needs_duplication_with_deps, + is_isomorphic_duplication, + duplicate_inames_with_deps) + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + n = 2**8 + import pyopencl.clrandom as cl_random + a_mat_dev = cl_random.rand(queue, (n, n), dtype=np.float32) + b_mat_dev = cl_random.rand(queue, (n, n), dtype=np.float32) + c_mat_dev = cl.array.zeros(queue, (n, n), dtype=np.float32) + + knl = lp.make_kernel( + "{ [i, j, k, k1]: 0<=i, j, k, k1<256 }", + """ + temp_cnst[k] = 2.0 {id=insn_1} + temp_cnst_2[k1] = 2*temp_cnst[k1] {id=insn_4} + c[i, j] = reduce(sum, k, temp_cnst[k]*a[i,k]*b[k,j]) {id=insn_2} + c[i, j] = reduce(sum, k1, temp_cnst_2[k1]*a[i,k1]*b[k1,j]) {id=insn_3} + """, + [lp.TemporaryVariable("temp_cnst", + dtype=np.float32, + shape=lp.auto, + base_indices=lp.auto, + scope=lp.temp_var_scope.PRIVATE), + lp.TemporaryVariable("temp_cnst_2", + dtype=np.float32, + shape=lp.auto, + base_indices=lp.auto, + scope=lp.temp_var_scope.PRIVATE), + '...'] + ) + knl = lp.set_options(knl, ignore_boostable_into=True) + knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, + b=np.float32, + c=np.float32)) + processed_knl = lp.preprocess.preprocess_kernel(knl) + assert not lp.has_schedulable_iname_nesting(processed_knl) + fixed_knl = processed_knl.copy() + while not lp.has_schedulable_iname_nesting(fixed_knl): + dupls_list = list(lp.get_iname_duplication_options(fixed_knl)) + for iname, insn in dupls_list: + if needs_duplication_with_deps(fixed_knl, iname, insn): + if not is_isomorphic_duplication(fixed_knl, iname, insn): + fixed_knl = duplicate_inames_with_deps(fixed_knl, + iname, insn) + break + else: + fixed_knl = lp.duplicate_inames(fixed_knl, iname, insn) + break + assert lp.has_schedulable_iname_nesting(fixed_knl) + knl = fixed_knl + knl = lp.set_options(knl, "write_cl") + kernel_args = {} + kernel_args['a'] = a_mat_dev + kernel_args['b'] = b_mat_dev + kernel_args['c'] = c_mat_dev + + evt, (out,) = knl(queue, **kernel_args) + + a = a_mat_dev.get() + b = b_mat_dev.get() + c = out.get() + + assert np.linalg.norm(4*a.dot(b)-c)/np.linalg.norm(4*a.dot(b)) < 1e-7 + + def test_regression_no_ret_call_removal(ctx_factory): # https://github.com/inducer/loopy/issues/32 knl = lp.make_kernel( -- GitLab From bf540050a2df4fa1bb1506c1ca36124e9dd009b1 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 19 Jan 2018 23:07:54 -0600 Subject: [PATCH 09/17] Remove the incosistency in the test function --- test/test_loopy.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 5a33ab8a5..9e57aa26e 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1809,8 +1809,7 @@ def test_duplication_options_with_deps(ctx_factory): "{ [i, j, k, k1]: 0<=i, j, k, k1<256 }", """ temp_cnst[k] = 2.0 {id=insn_1} - temp_cnst_2[k1] = 2*temp_cnst[k1] {id=insn_4} - c[i, j] = reduce(sum, k, temp_cnst[k]*a[i,k]*b[k,j]) {id=insn_2} + temp_cnst_2[k1] = 2*temp_cnst[k1] {id=insn_2} c[i, j] = reduce(sum, k1, temp_cnst_2[k1]*a[i,k1]*b[k1,j]) {id=insn_3} """, [lp.TemporaryVariable("temp_cnst", @@ -1830,6 +1829,8 @@ def test_duplication_options_with_deps(ctx_factory): b=np.float32, c=np.float32)) processed_knl = lp.preprocess.preprocess_kernel(knl) + + # Assert that earlier it was not schedulable assert not lp.has_schedulable_iname_nesting(processed_knl) fixed_knl = processed_knl.copy() while not lp.has_schedulable_iname_nesting(fixed_knl): @@ -1843,6 +1844,8 @@ def test_duplication_options_with_deps(ctx_factory): else: fixed_knl = lp.duplicate_inames(fixed_knl, iname, insn) break + + # Assert that after the duplication, everything works assert lp.has_schedulable_iname_nesting(fixed_knl) knl = fixed_knl knl = lp.set_options(knl, "write_cl") -- GitLab From e6bf165e2031d91fb6fc4c28aaf6d4a8f1446e07 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 21 Jan 2018 01:36:07 -0600 Subject: [PATCH 10/17] Minor fixes after the review. Major fix coming through! --- loopy/transform/iname.py | 226 ++++++++++++++++++++------------------- 1 file changed, 114 insertions(+), 112 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index b821e2b27..d6adc7ca6 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -766,11 +766,103 @@ class _InameDuplicator(RuleAwareIdentityMapper): return insn.copy(within_inames=new_fid) +def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, + tags={}): + """ + :arg within: a stack match as understood by + :func:`loopy.match.parse_stack_match`. + """ + + # {{{ normalize arguments, find unique new_inames + + if isinstance(inames, str): + inames = [iname.strip() for iname in inames.split(",")] + + if isinstance(new_inames, str): + new_inames = [iname.strip() for iname in new_inames.split(",")] + + from loopy.match import parse_stack_match + within = parse_stack_match(within) + + if new_inames is None: + new_inames = [None] * len(inames) + + if len(new_inames) != len(inames): + raise ValueError("new_inames must have the same number of entries as inames") + + name_gen = knl.get_var_name_generator() + + for i, iname in enumerate(inames): + new_iname = new_inames[i] + + if new_iname is None: + new_iname = iname + + if suffix is not None: + new_iname += suffix + + new_iname = name_gen(new_iname) + + else: + if name_gen.is_name_conflicting(new_iname): + raise ValueError("new iname '%s' conflicts with existing names" + % new_iname) + + name_gen.add_name(new_iname) + + new_inames[i] = new_iname + + # }}} + + # {{{ duplicate the inames + + for old_iname, new_iname in zip(inames, new_inames): + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(knl, frozenset([old_iname])) + + from loopy.isl_helpers import duplicate_axes + knl = knl.copy( + domains=domch.get_domains_with( + duplicate_axes(domch.domain, [old_iname], [new_iname]))) + + # }}} + + # {{{ change the inames in the code + + rule_mapping_context = SubstitutionRuleMappingContext( + knl.substitutions, name_gen) + indup = _InameDuplicator(rule_mapping_context, + old_to_new=dict(list(zip(inames, new_inames))), + within=within) + + knl = rule_mapping_context.finish_kernel( + indup.map_kernel(knl)) + + # }}} + + # {{{ realize tags + + for old_iname, new_iname in zip(inames, new_inames): + new_tag = tags.get(old_iname) + if new_tag is not None: + knl = tag_inames(knl, {new_iname: new_tag}) + + # }}} + + return knl + +# }}} + + +# {{{ iname duplication with dependencies def needs_duplication_with_deps(knl, iname, within): """ - This function checks whether we would need non-trivial duplication instead - of our normal free duplication. This would check the indices of the - associated variables in the instructions + ********************************************** + ********************************************** + --FIX THIS-- + This needs an example and proper explanation + ********************************************** + ********************************************** :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. @@ -821,7 +913,8 @@ def needs_duplication_with_deps(knl, iname, within): rhs_queue.append(child) else: # these are the ints, floats in the expression and hence ignored - pass + from numbers import Number + assert isinstance(elem, Number) # all the reason needed for the expensive duplication have been negated, # hence we can go ahead and do free duplication @@ -830,12 +923,10 @@ def needs_duplication_with_deps(knl, iname, within): def is_isomorphic_duplication(knl, iname, within): """ - This function checks whether the duplication of the given iname would solve - any purpose. This should be a check for the user to perform before - performing the :func:`loopy.duplicate_iname_with_deps`. If the function - returns `True`, that is a signal to **NOT** perform the duplication, and - instead explore other available options. - + This is a check for the user to perform before using the + :func:`loopy.duplicate_iname_with_deps`. If the function returns `True`, + that is a signal to **NOT** perform the duplication, and instead explore + other available options. :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. @@ -844,7 +935,8 @@ def is_isomorphic_duplication(knl, iname, within): insns_related_to_inames = frozenset([]) for insn in knl.instructions: if iname in insn.within_inames: - insns_related_to_inames |= frozenset([insn.id]) + insns_related_to_inames = (insns_related_to_inames | + frozenset([insn.id])) forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) @@ -853,13 +945,9 @@ def is_isomorphic_duplication(knl, iname, within): def get_forward_and_rev_deps(knl, iname, within): """ - This function is typically not needed from the perspective of a user. The - main objective of the function would be to find out all the forward and - reverse dependencies that are needed for for the duplication of an iname - in a instruction. Returns a `frozenset` of forward and backward - dependencies of an instruction which have the particular `iname` as one of - their `within_inames` - + Returns a `frozenset` of forward and backward dependencies of an + instruction which have the particular `iname` as one of their + `within_inames`. :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. @@ -870,9 +958,9 @@ def get_forward_and_rev_deps(knl, iname, within): insn = within else: from loopy.match import parse_stack_match - insns = parse_stack_match(within) + within = parse_stack_match(within) for insn in knl.instructions: - if insns(knl, insn, ()): + if within(knl, insn, ()): break from loopy.kernel.tools import find_reverse_dependencies @@ -889,21 +977,21 @@ def get_forward_and_rev_deps(knl, iname, within): q_rev = deque(find_reverse_dependencies(knl, frozenset([insn_id]))) # This is a graph traversal - while len(q) != 0: + while q: elem = q.pop() elem_insn = knl.id_to_insn[elem] if iname in elem_insn.within_inames: - depends |= frozenset([elem]) + depends = depends | frozenset([elem]) for e in elem_insn.depends_on: # adding the deps of the current insn into the queue so that # particular insn could also be visited q.append(e) - while len(q_rev) != 0: + while q_rev: elem = q_rev.pop() elem_insn = knl.id_to_insn[elem] if iname in elem_insn.within_inames: - rev_depends |= frozenset([elem]) + rev_depends = rev_depends | frozenset([elem]) for e in find_reverse_dependencies(knl, frozenset([elem])): # adding the parent deps of the current insn into the queue so # that particular insn could also be visited @@ -935,99 +1023,13 @@ def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, for k in q_forward_and_rev_deps: # duplicating single instruction at a time - from loopy.match import Id, Or - insn_match_or = Or(tuple([Id(k)])) + from loopy.match import Id + insn_match_or = Id(k) knl = duplicate_inames(knl, [iname], insn_match_or, new_iname, suffix, tags) return knl - -def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, - tags={}): - """ - :arg within: a stack match as understood by - :func:`loopy.match.parse_stack_match`. - """ - - # {{{ normalize arguments, find unique new_inames - - if isinstance(inames, str): - inames = [iname.strip() for iname in inames.split(",")] - - if isinstance(new_inames, str): - new_inames = [iname.strip() for iname in new_inames.split(",")] - - from loopy.match import parse_stack_match - within = parse_stack_match(within) - - if new_inames is None: - new_inames = [None] * len(inames) - - if len(new_inames) != len(inames): - raise ValueError("new_inames must have the same number of entries as inames") - - name_gen = knl.get_var_name_generator() - - for i, iname in enumerate(inames): - new_iname = new_inames[i] - - if new_iname is None: - new_iname = iname - - if suffix is not None: - new_iname += suffix - - new_iname = name_gen(new_iname) - - else: - if name_gen.is_name_conflicting(new_iname): - raise ValueError("new iname '%s' conflicts with existing names" - % new_iname) - - name_gen.add_name(new_iname) - - new_inames[i] = new_iname - - # }}} - - # {{{ duplicate the inames - - for old_iname, new_iname in zip(inames, new_inames): - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(knl, frozenset([old_iname])) - - from loopy.isl_helpers import duplicate_axes - knl = knl.copy( - domains=domch.get_domains_with( - duplicate_axes(domch.domain, [old_iname], [new_iname]))) - - # }}} - - # {{{ change the inames in the code - - rule_mapping_context = SubstitutionRuleMappingContext( - knl.substitutions, name_gen) - indup = _InameDuplicator(rule_mapping_context, - old_to_new=dict(list(zip(inames, new_inames))), - within=within) - - knl = rule_mapping_context.finish_kernel( - indup.map_kernel(knl)) - - # }}} - - # {{{ realize tags - - for old_iname, new_iname in zip(inames, new_inames): - new_tag = tags.get(old_iname) - if new_tag is not None: - knl = tag_inames(knl, {new_iname: new_tag}) - - # }}} - - return knl - # }}} -- GitLab From cbefdf9f8d73088b1644215e7d140c64025538b7 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 21 Jan 2018 22:37:20 -0600 Subject: [PATCH 11/17] Done some major changes in `iname_duplication_with_deps`, so that it gives out correct transformation --- loopy/transform/iname.py | 58 +++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index d6adc7ca6..c351638ef 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -964,42 +964,33 @@ def get_forward_and_rev_deps(knl, iname, within): break from loopy.kernel.tools import find_reverse_dependencies - - rev_depends = frozenset([]) - depends = frozenset([]) - from collections import deque - insn_id = insn.id - # initializes the queue of insn ids for forwards dependencies - q = deque(frozenset([insn_id])) - # initializes the queue of insn ids of the reverse dependencies - q_rev = deque(find_reverse_dependencies(knl, frozenset([insn_id]))) + # Initializing for the traversal + forw_rev_depends = set([]) + has_or_will_visit = set([insn.id]) + q = deque(frozenset([insn.id])) - # This is a graph traversal while q: - elem = q.pop() - elem_insn = knl.id_to_insn[elem] + elem_id = q.pop() + elem_insn = knl.id_to_insn[elem_id] if iname in elem_insn.within_inames: - depends = depends | frozenset([elem]) + forw_rev_depends.add(elem_id) + + # adding the forward dependencies for e in elem_insn.depends_on: - # adding the deps of the current insn into the queue so that - # particular insn could also be visited - q.append(e) + # checking that it has already not been visited/scheduled + if e not in has_or_will_visit: + q.append(e) + has_or_will_visit.add(e) - while q_rev: - elem = q_rev.pop() - elem_insn = knl.id_to_insn[elem] - if iname in elem_insn.within_inames: - rev_depends = rev_depends | frozenset([elem]) - for e in find_reverse_dependencies(knl, frozenset([elem])): - # adding the parent deps of the current insn into the queue so - # that particular insn could also be visited - q_rev.append(e) + for e in find_reverse_dependencies(knl, frozenset([elem_id])): + # checking that it has already not been visited/scheduled + if e not in has_or_will_visit: + q.append(e) + has_or_will_visit.add(e) - # Making a union of both these sets. - # As it has been realized that we would need to do that for both of those. - return depends | rev_depends + return forw_rev_depends def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, @@ -1021,12 +1012,11 @@ def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, # On these instructions we need to perform the duplication q_forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) - for k in q_forward_and_rev_deps: - # duplicating single instruction at a time - from loopy.match import Id - insn_match_or = Id(k) - knl = duplicate_inames(knl, [iname], insn_match_or, - new_iname, suffix, tags) + from loopy.match import Id, Or + # Making the Or object so that it could be fed to `duplicate_inames` + insns_match_or = Or(tuple(Id(k) for k in q_forward_and_rev_deps)) + knl = duplicate_inames(knl, [iname], insns_match_or, + new_iname, suffix, tags) return knl -- GitLab From d7f44422f39949db429f9ef42b73b6598ecf4eaa Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 23 Jan 2018 16:33:35 -0600 Subject: [PATCH 12/17] Improves documentation support and re-orders the function to improve readability. --- loopy/transform/iname.py | 110 +++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 34 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c351638ef..f41aebdca 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -857,12 +857,11 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # {{{ iname duplication with dependencies def needs_duplication_with_deps(knl, iname, within): """ - ********************************************** - ********************************************** - --FIX THIS-- - This needs an example and proper explanation - ********************************************** - ********************************************** + The :func:`lp.duplicate_inames` might fail when the instruction to be + duplicated has dependencies associated with it. This function acts as a + check between :func:`loop.duplicate_inames` and + :func:`loopy.duplicate_inames_with_deps`. + :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. @@ -921,28 +920,6 @@ def needs_duplication_with_deps(knl, iname, within): return False -def is_isomorphic_duplication(knl, iname, within): - """ - This is a check for the user to perform before using the - :func:`loopy.duplicate_iname_with_deps`. If the function returns `True`, - that is a signal to **NOT** perform the duplication, and instead explore - other available options. - - :arg within: a stack match as understood by - :func:`loopy.match.parse_stack_match`. - """ - # intended to store all the insn ids related to a given iname - insns_related_to_inames = frozenset([]) - for insn in knl.instructions: - if iname in insn.within_inames: - insns_related_to_inames = (insns_related_to_inames | - frozenset([insn.id])) - - forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) - - return forward_and_rev_deps == insns_related_to_inames - - def get_forward_and_rev_deps(knl, iname, within): """ Returns a `frozenset` of forward and backward dependencies of an @@ -969,7 +946,7 @@ def get_forward_and_rev_deps(knl, iname, within): # Initializing for the traversal forw_rev_depends = set([]) has_or_will_visit = set([insn.id]) - q = deque(frozenset([insn.id])) + q = deque([insn.id]) while q: elem_id = q.pop() @@ -993,19 +970,84 @@ def get_forward_and_rev_deps(knl, iname, within): return forw_rev_depends +def is_isomorphic_duplication(knl, iname, within): + """ + This is a check for the user to perform before using the + :func:`loopy.duplicate_iname_with_deps`. If the function returns `True`, + that is a signal to **NOT** perform the duplication, and instead explore + other available options. + + :arg within: a stack match as understood by + :func:`loopy.match.parse_stack_match`. + """ + # intended to store all the insn ids related to a given iname + insns_related_to_inames = set() + for insn in knl.instructions: + if iname in insn.within_inames: + insns_related_to_inames.add(insn.id) + + forward_and_rev_deps = get_forward_and_rev_deps(knl, iname, within) + + return forward_and_rev_deps == frozenset(insns_related_to_inames) + + def duplicate_inames_with_deps(knl, iname, within, new_iname=None, suffix=None, tags={}): """ - This will duplicate the particular iname in all the insns that are - dependendent on the current insn or even that depend on the current insn. - The working of this is very simple, it just calls - :func:`lp.duplicate_inames`. + This transformation will duplicate the particular iname in all the insns + that are dependendent on the current insn or even that the current insn + depends on. + + One must implent this variant of duplication over + :func:`loopy.duplicate_inames` if :func:`loopy.needs_duplication_with_deps` + returns `True`. + + As an example consider: + There are 7 instructions[a, b, c, d, e, f, g], with the dependency + graph [Parents at the top and children at the bottom] as: + ``` + a + / \ + b c + / \ \ + d e f + \ + g + ``` + And the `within_inames` for the instructions are as follows: + ``` + a -> i, j, k + b -> i, j + c -> i + d -> i, m + e -> p, q + f -> x, y + g -> i, j + ``` + If we need to duplicate `i` in `b`. + + - Consider the children of `b`: `d` and `e`. We must duplicate `i` in `d`, + but we need not duplicate `i` in `e`, as `i` is not in the `within_iname` + of `e`. + + - Consider the parents of `b`: `a` It has `i` in its `inames` hence + duplication is necessary. + + - Hence, now have figured that we also have to duplicate `i` in the + instructions `d` and `a` as well. This would again trigger, the + "duplication fire" for these instructions(`d` and `a`), and so finally we + are left with the requirement that, we need to duplicate `i` in : + `b, d, a, c`. + + This example in short summarizes the intent of the function. + .. note:: Currently this would only take a single iname for duplicating. For multiple inames please use the same function multiple number of times. :arg within: a stack match as understood by - :func:`loopy.match.parse_stack_match`. + :func:`loopy.match.parse_stack_match`, preferably the one returned by + :func:`loopy.get_iname_duplication_options`. """ # Getting the relevant forward and backward dependencies. -- GitLab From cd2926fb335e56a5510319b36dc5e7d74c3de2f6 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 23 Jan 2018 17:02:32 -0600 Subject: [PATCH 13/17] Improved code readability --- loopy/transform/iname.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index f41aebdca..fa0cbcaad 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -954,6 +954,7 @@ def get_forward_and_rev_deps(knl, iname, within): if iname in elem_insn.within_inames: forw_rev_depends.add(elem_id) + # e is used to denote the insn_id's over the iterables # adding the forward dependencies for e in elem_insn.depends_on: # checking that it has already not been visited/scheduled @@ -961,6 +962,7 @@ def get_forward_and_rev_deps(knl, iname, within): q.append(e) has_or_will_visit.add(e) + # adding the reverse dependencies for e in find_reverse_dependencies(knl, frozenset([elem_id])): # checking that it has already not been visited/scheduled if e not in has_or_will_visit: -- GitLab From e77f4041896fb89016da643cd1190a760b8ad021 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 27 Jan 2018 15:36:03 -0600 Subject: [PATCH 14/17] Removed `write_cl` options for the kernel in the test. --- test/test_loopy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 9e57aa26e..05455237f 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1848,7 +1848,6 @@ def test_duplication_options_with_deps(ctx_factory): # Assert that after the duplication, everything works assert lp.has_schedulable_iname_nesting(fixed_knl) knl = fixed_knl - knl = lp.set_options(knl, "write_cl") kernel_args = {} kernel_args['a'] = a_mat_dev kernel_args['b'] = b_mat_dev -- GitLab From 476f28c562e2a42c37eda595fc078c75d97448dd Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 27 Jan 2018 16:43:19 -0600 Subject: [PATCH 15/17] Fixed logical errors in the program. Now heading to the part of mappers. --- loopy/transform/iname.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index fa0cbcaad..a9dd61991 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -862,6 +862,10 @@ def needs_duplication_with_deps(knl, iname, within): check between :func:`loop.duplicate_inames` and :func:`loopy.duplicate_inames_with_deps`. + .. note:: + Currently this function only supports for a single instruction and + iname. Please call this function multiple number of times, if it is + intended to be used for more than one instructions or inames. :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. @@ -917,6 +921,22 @@ def needs_duplication_with_deps(knl, iname, within): # all the reason needed for the expensive duplication have been negated, # hence we can go ahead and do free duplication + + # one last check is that the either forward dependencies of this + # instruction or reverse depndencies of this insn do not have `iname` in + # their `within_inames` + + forward_within = set() + for forw in insn.depends_on: + forward_within.update(knl.id_to_insn[forw].within_inames) + + rev_within = set() + for rev in insn.depends_on: + rev_within.update(knl.id_to_insn[rev].within_inames) + + if iname in rev_within & forward_within: + return True + return False -- GitLab From bfe2854af6012ef6e19309cde0d424d3388efe15 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 27 Jan 2018 19:29:06 -0600 Subject: [PATCH 16/17] Added a WalkMapper for the traversal --- loopy/transform/iname.py | 79 ++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index a9dd61991..da252720c 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -870,7 +870,6 @@ def needs_duplication_with_deps(knl, iname, within): :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. """ - from pymbolic.primitives import (Subscript, Variable) from loopy.match import parse_stack_match insns = parse_stack_match(within) @@ -879,52 +878,46 @@ def needs_duplication_with_deps(knl, iname, within): if insns(knl, insn, ()): break - # Handling the LHS - lhs = insn.assignee - if not isinstance(lhs, Subscript): - return True - if isinstance(lhs.index, Variable): - if Variable(iname) != lhs.index: - return True - else: - if Variable(iname) not in lhs.index: - return True + # Creating the class for checking whether the lhs and rhs are compliant + from pymbolic.mapper import WalkMapper + from pymbolic.primitives import (Variable, Subscript) - # The LHS is now compliant with free duplication - # analyzing the RHS now - rhs = insn.expression - from collections import deque - rhs_queue = deque() - rhs_queue.append(rhs) - - while len(rhs_queue) != 0: - elem = rhs_queue.pop() - if isinstance(elem, Variable): - # encountered non-indexed variable and hence needs dupl_with_deps - return True - elif isinstance(elem, Subscript): - if isinstance(elem.index, Variable): - if Variable(iname) != elem.index: - return True - else: - if not Variable(iname) in elem.index: + class InameChecker(WalkMapper): + result = False + + def map_subscript(self, expr, *args, **kwargs): + if not self.visit(expr, *args, **kwargs): + return + + for index in expr.index_tuple: + if not isinstance(index, Variable): + self.rec(index, *args, **kwargs) + + self.post_visit(expr, *args, **kwargs) + + def post_visit(self, expr): + if isinstance(expr, Variable): + self.result = True + elif isinstance(expr, Subscript): + if Variable(iname) not in expr.index_tuple: # iname not in indices needs dupl_with_deps - return True - elif 'children' in dir(elem): - # the element is also an expression - for child in elem.children: - rhs_queue.append(child) - else: - # these are the ints, floats in the expression and hence ignored - from numbers import Number - assert isinstance(elem, Number) + self.result = True + + iname_checker = InameChecker() + + # Getting LHS and RHS + lhs = insn.assignee + rhs = insn.expression - # all the reason needed for the expensive duplication have been negated, - # hence we can go ahead and do free duplication + # Checking the inames for LHS and RHS + iname_checker(lhs) + iname_checker(rhs) + + if iname_checker.result: + return True - # one last check is that the either forward dependencies of this - # instruction or reverse depndencies of this insn do not have `iname` in - # their `within_inames` + # one last check is that the either forward dependencies or reverse + # depndencies of this insn do not have `iname` in their `within_inames` forward_within = set() for forw in insn.depends_on: -- GitLab From e1aa983a98c4e009ebb273a16268b1f3907baf47 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 5 Feb 2018 23:40:39 -0600 Subject: [PATCH 17/17] Introduced the class definition outside, added assertion that only one instruction is needed for processing iname_duplication_with_deps and some doc improvements. --- loopy/transform/iname.py | 80 +++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index da252720c..38d955f03 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -31,8 +31,9 @@ from islpy import dim_type from loopy.symbolic import ( RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, - SubstitutionRuleMappingContext) + SubstitutionRuleMappingContext, WalkMapper) from loopy.diagnostic import LoopyError +from pymbolic.primitives import (Variable, Subscript) __doc__ = """ @@ -853,13 +854,46 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # }}} +# {{{ Helping mappers for duplication_with_deps + + +class InameChecker(WalkMapper): + result = None + iname = None + + def __init__(self, _iname): + self.result = False + self.iname = _iname + + def map_subscript(self, expr, *args, **kwargs): + if not self.visit(expr, *args, **kwargs): + return + + for index in expr.index_tuple: + if not isinstance(index, Variable): + self.rec(index, *args, **kwargs) + + self.post_visit(expr, *args, **kwargs) + + def post_visit(self, expr): + if isinstance(expr, Variable): + self.result = True + elif isinstance(expr, Subscript): + if Variable(self.iname) not in expr.index_tuple: + # iname not in indices needs dupl_with_deps + self.result = True + +# }}} + # {{{ iname duplication with dependencies + + def needs_duplication_with_deps(knl, iname, within): """ - The :func:`lp.duplicate_inames` might fail when the instruction to be - duplicated has dependencies associated with it. This function acts as a - check between :func:`loop.duplicate_inames` and + The :func:`lp.duplicate_inames` might not reciprocate the logic when the + instruction to be duplicated has dependencies associated with it. This + function acts as a check between :func:`loop.duplicate_inames` and :func:`loopy.duplicate_inames_with_deps`. .. note:: @@ -871,39 +905,25 @@ def needs_duplication_with_deps(knl, iname, within): :func:`loopy.match.parse_stack_match`. """ from loopy.match import parse_stack_match - insns = parse_stack_match(within) + from loopy.match import Or + is_within_or = isinstance(within, Or) + + within = parse_stack_match(within) # Getting the instruction of interest + insn_list = [] for insn in knl.instructions: - if insns(knl, insn, ()): - break + if within(knl, insn, ()): + insn_list.append(insn) - # Creating the class for checking whether the lhs and rhs are compliant - from pymbolic.mapper import WalkMapper - from pymbolic.primitives import (Variable, Subscript) + assert len(insn_list) == 1 or is_within_or, ('The function can only handle' + 'one instruction currently, please give input accordingly') - class InameChecker(WalkMapper): - result = False + insn = insn_list[0] - def map_subscript(self, expr, *args, **kwargs): - if not self.visit(expr, *args, **kwargs): - return - - for index in expr.index_tuple: - if not isinstance(index, Variable): - self.rec(index, *args, **kwargs) - - self.post_visit(expr, *args, **kwargs) - - def post_visit(self, expr): - if isinstance(expr, Variable): - self.result = True - elif isinstance(expr, Subscript): - if Variable(iname) not in expr.index_tuple: - # iname not in indices needs dupl_with_deps - self.result = True + # Creating the class for checking whether the lhs and rhs are compliant - iname_checker = InameChecker() + iname_checker = InameChecker(iname) # Getting LHS and RHS lhs = insn.assignee -- GitLab