From 3d5ac2498e5e92a800d112347d52b0a26d180072 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Jun 2016 16:06:05 -0500 Subject: [PATCH 01/10] Fix test_kernel_splitting_with_loop_and_private_temporary() (Code was not assigning iname dependencies to the newly added load / spill instructions.) --- loopy/schedule/device_mapping.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 77a07f000..924e6942f 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -564,7 +564,9 @@ def restore_and_save_temporaries(kernel): args = reversed(args) from loopy.kernel.data import Assignment - new_insn = Assignment(*args, id=insn_id) + new_insn = Assignment(*args, id=insn_id, + forced_iname_deps=frozenset(hw_inames + dim_inames), + forced_iname_deps_is_final=True) new_instructions.append(new_insn) -- GitLab From 0e6f47fa86b01191159fdfe1c14d65c3d869c45f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Jun 2016 17:37:15 -0500 Subject: [PATCH 02/10] Add support for local temporary spills / loads. --- loopy/schedule/device_mapping.py | 38 +++++++++++++++++++++++++----- loopy/target/pyopencl.py | 4 +++- test/test_loopy.py | 40 ++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 7 deletions(-) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 924e6942f..3c98c4edc 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -93,6 +93,20 @@ def get_common_hw_inames(kernel, insn_ids): set.intersection, (get_hw_inames(kernel, id_to_insn[id]) for id in insn_ids)) + +def remove_illegal_loops_for_hw_tagged_inames_in_schedule(kernel): + from loopy.kernel.data import HardwareParallelTag + new_schedule = [] + + for item in kernel.schedule: + if isinstance(item, (EnterLoop, LeaveLoop)): + tag = kernel.iname_to_tag.get(item.iname) + if isinstance(tag, HardwareParallelTag): + continue + new_schedule.append(item) + + return kernel.copy(schedule=new_schedule) + # }}} @@ -403,11 +417,15 @@ def augment_domain_for_temporary_promotion( new_iname = name_gen("{name}_{mode}_dim_{dim}". format(name=orig_temporary.name, mode=mode, - dim=orig_dim + t_idx)) + dim=t_idx)) domain = domain.set_dim_name( isl.dim_type.set, orig_dim + t_idx, new_iname) - #from loopy.kernel.data import auto - #iname_to_tag[new_iname] = auto + if orig_temporary.is_local: + # If the temporary is tagged local, then loads / stores can be done + # in parallel. + from loopy.kernel.data import AutoFitLocalIndexTag + iname_to_tag[new_iname] = AutoFitLocalIndexTag() + dim_inames.append(new_iname) # Add size information. @@ -614,17 +632,25 @@ def restore_and_save_temporaries(kernel): # }}} new_iname_to_tag.update(kernel.iname_to_tag) - new_temporary_variables = dict( + updated_temporary_variables = dict( (t.name, t.as_variable()) for t in new_temporaries.values()) - new_temporary_variables.update(kernel.temporary_variables) + updated_temporary_variables.update(kernel.temporary_variables) kernel = kernel.copy( iname_to_tag=new_iname_to_tag, - temporary_variables=new_temporary_variables, + temporary_variables=updated_temporary_variables, instructions=kernel.instructions + new_instructions, schedule=new_schedule ) + from loopy.kernel.tools import assign_automatic_axes + kernel = assign_automatic_axes(kernel) + + # Once assign_automatic_axes() does its job, loops in the schedule + # for newly hardware-tagged inames are no longer necessary (and in + # fact illegal), so remove them. + kernel = remove_illegal_loops_for_hw_tagged_inames_in_schedule(kernel) + return kernel diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index b570cf4ab..428f333f6 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -615,7 +615,9 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): from genpy import Assign, Comment, Line def alloc_nbytes(tv): - return tv.dtype.numpy_dtype.itemsize + from six.moves import reduce + from operator import mul + return tv.dtype.numpy_dtype.itemsize * reduce(mul, tv.shape, 1) from loopy.kernel.data import temp_var_scope diff --git a/test/test_loopy.py b/test/test_loopy.py index 26f4f2faf..4b6dc898f 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1136,6 +1136,46 @@ def test_kernel_splitting_with_loop_and_private_temporary(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) +def test_kernel_splitting_with_loop_and_local_temporary(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{ [i,k]: 0<=i t_local[i % 8,k] = i % 8 + c[k,i] = a[k,i+1] + out[k,i] = c[k,i] + t_local[i % 8,k] + """) + + knl = lp.add_and_infer_dtypes(knl, + {"a": np.float32, "c": np.float32, "out": np.float32, "n": np.int32}) + + knl = lp.set_temporary_scope(knl, "t_local", "local") + + ref_knl = knl + + knl = lp.split_iname(knl, "i", 8, outer_tag="g.0", inner_tag="l.0") + + # schedule + from loopy.preprocess import preprocess_kernel + knl = preprocess_kernel(knl) + + from loopy.schedule import get_one_scheduled_kernel + knl = get_one_scheduled_kernel(knl) + + # map schedule onto host or device + print(knl) + + cgr = lp.generate_code_v2(knl) + + assert len(cgr.device_programs) == 2 + + print(cgr.device_code()) + print(cgr.host_code()) + + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=8)) + + def test_global_temporary(ctx_factory): ctx = ctx_factory() -- GitLab From 21e91413cb8e4263212bb4aefae511eeb8351a67 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Jun 2016 18:03:17 -0500 Subject: [PATCH 03/10] Fix off-by-one error (yikes). --- loopy/schedule/device_mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 3c98c4edc..13d45503e 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -433,7 +433,7 @@ def augment_domain_for_temporary_promotion( domain &= aff[0].le_set(aff[new_iname]) size = orig_temporary.shape[t_idx] from loopy.symbolic import aff_from_expr - domain &= aff[new_iname].le_set(aff_from_expr(domain.space, size)) + domain &= aff[new_iname].lt_set(aff_from_expr(domain.space, size)) hw_inames = [] -- GitLab From 0386f58489e3bc93fb492a603d4debf8f78fa87a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Jun 2016 20:43:28 -0500 Subject: [PATCH 04/10] Device mapper: Add a comment describing the current temporary variable promotion policy in detail. --- loopy/schedule/device_mapping.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 924e6942f..9e05afcd9 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -325,6 +325,10 @@ class PromotedTemporary(Record): def determine_temporaries_to_promote(kernel, temporaries, name_gen): """ + For each temporary in the passed list of temporaries, construct a + :class:`PromotedTemporary` which describes how the temporary should + get promoted into global storage. + :returns: A :class:`dict` mapping temporary names from `temporaries` to :class:`PromotedTemporary` objects """ @@ -343,6 +347,18 @@ def determine_temporaries_to_promote(kernel, temporaries, name_gen): assert temporary.base_storage is None, \ "Cannot promote temporaries with base_storage to global" + # `hw_inames`: The set of hw-parallel tagged inames that this temporary + # is associated with. This is used for determining the shape of the + # global storage needed for saving and restoring the temporary across + # kernel calls. + # + # TODO: Make a policy decision about which dimensions to use. Currently, + # the code looks at each instruction that defines or uses the temporary, + # and takes the common set of hw-parallel tagged inames associated with + # these instructions. + # + # Furthermore, in the case of local temporaries, inames that are tagged + # hw-local do not contribute to the global storage shape. hw_inames = get_common_hw_inames(kernel, def_lists[temporary.name] + use_lists[temporary.name]) @@ -350,6 +366,8 @@ def determine_temporaries_to_promote(kernel, temporaries, name_gen): hw_inames = sorted(hw_inames, key=lambda iname: str(kernel.iname_to_tag[iname])) + # Calculate the sizes of the dimensions that get added in front for + # the global storage of the temporary. shape_prefix = [] backing_hw_inames = [] -- GitLab From e816facf92f4a924e8bb9fe32dc6021ea4a201c6 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Jun 2016 20:46:41 -0500 Subject: [PATCH 05/10] Fix typo. --- loopy/schedule/device_mapping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 9e05afcd9..b317135dd 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -260,7 +260,7 @@ def compute_live_temporaries(kernel, schedule): live_in[idx] = live_out[idx] = live_in[idx + 1] idx -= 1 else: - raise LoopyError("unexepcted type of schedule item: %s" + raise LoopyError("unexpected type of schedule item: %s" % type(sched_item).__name__) # }}} @@ -732,7 +732,7 @@ def map_schedule_onto_host_or_device_impl(kernel): current_chunk.append(sched_item) i += 1 else: - raise LoopyError("unexepcted type of schedule item: %s" + raise LoopyError("unexpected type of schedule item: %s" % type(sched_item).__name__) if current_chunk and schedule_required_splitting: -- GitLab From e64ec00044c6368c1eca6ab97b36a434e9a16a35 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Jun 2016 21:33:02 -0500 Subject: [PATCH 06/10] Fix domain changing bug that used previous versions of the kernel. --- loopy/schedule/device_mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 997741719..e1b308cc4 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -566,7 +566,7 @@ def restore_and_save_temporaries(kernel): for tval in tvals: from loopy.kernel.tools import DomainChanger tval_hw_inames = new_temporaries[tval].hw_inames - dchg = DomainChanger(kernel, + dchg = DomainChanger(new_kernel, frozenset(sched_item.extra_inames + tval_hw_inames)) domain = dchg.domain -- GitLab From cfe1f9481597352adf66f4e9254c3601cbf260de Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 7 Jun 2016 21:36:14 -0500 Subject: [PATCH 07/10] Test save/restore of private arrays. --- test/test_loopy.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 4b6dc898f..5a2243fe6 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1104,13 +1104,16 @@ def test_kernel_splitting_with_loop_and_private_temporary(ctx_factory): knl = lp.make_kernel( "{ [i,k]: 0<=i t_private = a[k,i+1] + <> t_private_scalar = a[k,i+1] + <> t_private_array[i % 2] = a[k,i+1] c[k,i] = a[k,i+1] - out[k,i] = c[k,i] + t_private + out[k,i] = c[k,i] + t_private_scalar + t_private_array[i % 2] """) knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "c": np.float32, "out": np.float32, "n": np.int32}) + knl = lp.set_temporary_scope(knl, "t_private_scalar", "private") + knl = lp.set_temporary_scope(knl, "t_private_array", "private") ref_knl = knl -- GitLab From 6ab79dd2951836b4becbb5578983c1ff2b49ee06 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 8 Jun 2016 18:59:07 -0500 Subject: [PATCH 08/10] Fix spelling error. --- loopy/schedule/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 8b2a8190c..6064f5a2b 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1053,7 +1053,7 @@ class DependencyRecord(Record): def get_barrier_needing_dependency(kernel, target, source, reverse, var_kind): - """If there exists a depdency between target and source and the two access + """If there exists a dependency between target and source and the two access a common variable of *var_kind* in a way that requires a barrier (essentially, at least one write), then the function will return a tuple ``(target, source, var_name)``. Otherwise, it will return *None*. -- GitLab From a0d26fb39eb7e7a54084888d56ca034fd17ff889 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 9 Jun 2016 11:16:00 -0500 Subject: [PATCH 09/10] Fix comment. --- loopy/schedule/device_mapping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index e1b308cc4..c393fe7c2 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -439,8 +439,8 @@ def augment_domain_for_temporary_promotion( domain = domain.set_dim_name( isl.dim_type.set, orig_dim + t_idx, new_iname) if orig_temporary.is_local: - # If the temporary is tagged local, then loads / stores can be done - # in parallel. + # If the temporary is has local scope, then loads / stores can be + # done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag iname_to_tag[new_iname] = AutoFitLocalIndexTag() -- GitLab From bc324c37392a7b87aa4bf6fac469156cf29111eb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 9 Jun 2016 11:18:15 -0500 Subject: [PATCH 10/10] Fix reduce() silliness. --- loopy/schedule/device_mapping.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index c393fe7c2..4bbf99d57 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -87,11 +87,8 @@ def get_common_hw_inames(kernel, insn_ids): # Get the list of hardware inames in which the temporary is defined. if len(insn_ids) == 0: return set() - id_to_insn = kernel.id_to_insn - from six.moves import reduce - return reduce( - set.intersection, - (get_hw_inames(kernel, id_to_insn[id]) for id in insn_ids)) + return set.intersection( + *(get_hw_inames(kernel, kernel.id_to_insn[id]) for id in insn_ids)) def remove_illegal_loops_for_hw_tagged_inames_in_schedule(kernel): -- GitLab