From b79a7909bd8f5125d921e5d586ffb751a3115bc8 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 1 Mar 2020 21:41:58 -0600 Subject: [PATCH 1/8] rename+deprecate get_one_scheduled_kernel()->get_one_linearized_kernel() --- doc/ref_transform.rst | 2 +- doc/tutorial.rst | 10 +++---- examples/python/global_barrier_removal.py | 4 +-- examples/python/ispc-stream-harness.py | 2 +- loopy/__init__.py | 4 +-- loopy/codegen/__init__.py | 4 +-- loopy/kernel/tools.py | 4 +-- loopy/schedule/__init__.py | 9 +++++++ loopy/statistics.py | 2 +- loopy/target/execution.py | 4 +-- test/test_loopy.py | 32 +++++++++++------------ test/test_target.py | 8 +++--- test/test_transform.py | 2 +- 13 files changed, 48 insertions(+), 39 deletions(-) diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index 740c5cb58..57d33b539 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -118,7 +118,7 @@ Finishing up .. autofunction:: generate_loop_schedules -.. autofunction:: get_one_scheduled_kernel +.. autofunction:: get_one_linearized_kernel .. autofunction:: save_and_reload_temporaries diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 753b09b5d..67ca22bfa 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1204,9 +1204,9 @@ Here is what happens when we try to generate code for the kernel: This happens due to the kernel splitting done by :mod:`loopy`. The splitting happens when the instruction schedule is generated. To see the schedule, we -should call :func:`loopy.get_one_scheduled_kernel`: +should call :func:`loopy.get_one_linearized_kernel`: - >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + >>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) >>> print(knl) --------------------------------------------------------------------------- KERNEL: rotate_v2 @@ -1233,12 +1233,12 @@ goes for local temporaries). :func:`loopy.save_and_reload_temporaries` for the purpose of handling the task of saving and restoring temporary values across global barriers. This function adds instructions to the kernel without scheduling them. That means -that :func:`loopy.get_one_scheduled_kernel` needs to be called one more time to +that :func:`loopy.get_one_linearized_kernel` needs to be called one more time to put those instructions into the schedule. - >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + >>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) >>> knl = lp.save_and_reload_temporaries(knl) - >>> knl = lp.get_one_scheduled_kernel(knl) # Schedule added instructions + >>> knl = lp.get_one_linearized_kernel(knl) # Schedule added instructions >>> print(knl) --------------------------------------------------------------------------- KERNEL: rotate_v2 diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index 7ab049cd1..ce37501e8 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -23,8 +23,8 @@ knl = lp.add_and_infer_dtypes(knl, from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) -from loopy.schedule import get_one_scheduled_kernel -knl = get_one_scheduled_kernel(knl) +from loopy.schedule import get_one_linearized_kernel +knl = get_one_linearized_kernel(knl) # map schedule onto host or device print(knl) diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py index fa581d426..34636fdd9 100644 --- a/examples/python/ispc-stream-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -30,7 +30,7 @@ def transform(knl, vars, stream_dtype): def gen_code(knl): knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) codegen_result = lp.generate_code_v2(knl) return codegen_result.device_code() + "\n" + codegen_result.host_code() diff --git a/loopy/__init__.py b/loopy/__init__.py index b60de6e2d..b227bd1dc 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -123,7 +123,7 @@ from loopy.transform.add_barrier import add_barrier from loopy.type_inference import infer_unknown_types from loopy.preprocess import preprocess_kernel, realize_reduction -from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel +from loopy.schedule import generate_loop_schedules, get_one_linearized_kernel from loopy.statistics import (ToCountMap, CountGranularity, stringify_stats_mapping, Op, MemAccess, get_op_poly, get_op_map, get_lmem_access_poly, get_DRAM_access_poly, get_gmem_access_poly, get_mem_access_map, @@ -248,7 +248,7 @@ __all__ = [ "infer_unknown_types", "preprocess_kernel", "realize_reduction", - "generate_loop_schedules", "get_one_scheduled_kernel", + "generate_loop_schedules", "get_one_linearized_kernel", "GeneratedProgram", "CodeGenerationResult", "PreambleInfo", "generate_code", "generate_code_v2", "generate_body", diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 11f874e1b..a6d57ee41 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -385,8 +385,8 @@ def generate_code_v2(kernel): kernel = preprocess_kernel(kernel) if kernel.schedule is None: - from loopy.schedule import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) + from loopy.schedule import get_one_linearized_kernel + kernel = get_one_linearized_kernel(kernel) if kernel.state != KernelState.SCHEDULED: raise LoopyError("cannot generate code for a kernel that has not been " diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index bb6ae44c9..40e942261 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -465,8 +465,8 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): if iname_cluster and not kernel.schedule: try: - from loopy.schedule import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) + from loopy.schedule import get_one_linearized_kernel + kernel = get_one_linearized_kernel(kernel) except RuntimeError as e: iname_cluster = False from warnings import warn diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index f145c7122..0983c5e0d 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -2029,6 +2029,15 @@ def _get_one_scheduled_kernel_inner(kernel): def get_one_scheduled_kernel(kernel): + warn_with_kernel( + kernel, "get_one_scheduled_kernel_deprecated", + "get_one_scheduled_kernel is deprecated. " + "Use get_one_linearized_kernel instead.", + DeprecationWarning) + return get_one_linearized_kernel(kernel) + + +def get_one_linearized_kernel(kernel): from loopy import CACHING_ENABLED sched_cache_key = kernel diff --git a/loopy/statistics.py b/loopy/statistics.py index 10d29daad..f0b87d2e0 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1723,7 +1723,7 @@ def get_synchronization_map(knl, subgroup_size=None): from operator import mul knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) iname_list = [] result = ToCountMap() diff --git a/loopy/target/execution.py b/loopy/target/execution.py index c8f0d4090..07c324f24 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -758,8 +758,8 @@ class KernelExecutorBase(object): from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) - from loopy.schedule import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) + from loopy.schedule import get_one_linearized_kernel + kernel = get_one_linearized_kernel(kernel) return kernel diff --git a/test/test_loopy.py b/test/test_loopy.py index 6b78ac26b..f512b026a 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1065,8 +1065,8 @@ def test_kernel_splitting(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.schedule import get_one_scheduled_kernel - knl = get_one_scheduled_kernel(knl) + from loopy.schedule import get_one_linearized_kernel + knl = get_one_linearized_kernel(knl) # map schedule onto host or device print(knl) @@ -1106,8 +1106,8 @@ def test_kernel_splitting_with_loop(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.schedule import get_one_scheduled_kernel - knl = get_one_scheduled_kernel(knl) + from loopy.schedule import get_one_linearized_kernel + knl = get_one_linearized_kernel(knl) # map schedule onto host or device print(knl) @@ -1124,14 +1124,14 @@ def test_kernel_splitting_with_loop(ctx_factory): def save_and_reload_temporaries_test(queue, knl, out_expect, debug=False): from loopy.preprocess import preprocess_kernel - from loopy.schedule import get_one_scheduled_kernel + from loopy.schedule import get_one_linearized_kernel knl = preprocess_kernel(knl) - knl = get_one_scheduled_kernel(knl) + knl = get_one_linearized_kernel(knl) from loopy.transform.save import save_and_reload_temporaries knl = save_and_reload_temporaries(knl) - knl = get_one_scheduled_kernel(knl) + knl = get_one_linearized_kernel(knl) if debug: print(knl) @@ -1395,7 +1395,7 @@ def test_save_ambiguous_storage_requirements(): knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): @@ -1752,7 +1752,7 @@ def test_missing_global_barrier(): from loopy.diagnostic import MissingBarrierError with pytest.raises(MissingBarrierError): - lp.get_one_scheduled_kernel(knl) + lp.get_one_linearized_kernel(knl) def test_index_cse(ctx_factory): @@ -1884,7 +1884,7 @@ def test_const_temp_with_initializer_not_saved(): seq_dependencies=True) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) knl = lp.save_and_reload_temporaries(knl) # This ensures no save slot was added. @@ -2089,7 +2089,7 @@ def test_unscheduled_insn_detection(): """, "...") - knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) insn1, = lp.find_instructions(knl, "id:insn1") knl.instructions.append(insn1.copy(id="insn2")) @@ -2254,7 +2254,7 @@ def test_barrier_insertion_near_top_of_loop(): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.set_temporary_scope(knl, "b", "local") - knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) print(knl) @@ -2281,7 +2281,7 @@ def test_barrier_insertion_near_bottom_of_loop(): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.set_temporary_scope(knl, "b", "local") - knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) print(knl) @@ -2650,7 +2650,7 @@ def test_check_for_variable_access_ordering(): from loopy.diagnostic import VariableAccessNotOrdered with pytest.raises(VariableAccessNotOrdered): - lp.get_one_scheduled_kernel(knl) + lp.get_one_linearized_kernel(knl) def test_check_for_variable_access_ordering_with_aliasing(): @@ -2669,7 +2669,7 @@ def test_check_for_variable_access_ordering_with_aliasing(): from loopy.diagnostic import VariableAccessNotOrdered with pytest.raises(VariableAccessNotOrdered): - lp.get_one_scheduled_kernel(knl) + lp.get_one_linearized_kernel(knl) @pytest.mark.parametrize(("second_index", "expect_barrier"), @@ -2692,7 +2692,7 @@ def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): knl = lp.tag_inames(knl, "i:l.0") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) assert barrier_between(knl, "first", "second") == expect_barrier diff --git a/test/test_target.py b/test/test_target.py index bcf85a340..e83d28154 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -73,7 +73,7 @@ def test_ispc_target(occa_mode=False): default_tag="l.auto") codegen_result = lp.generate_code_v2( - lp.get_one_scheduled_kernel( + lp.get_one_linearized_kernel( lp.preprocess_kernel(knl))) print(codegen_result.device_code()) @@ -99,7 +99,7 @@ def test_cuda_target(): print( lp.generate_code( - lp.get_one_scheduled_kernel( + lp.get_one_linearized_kernel( lp.preprocess_kernel(knl)))[0]) @@ -142,7 +142,7 @@ def test_generate_c_snippet(): knl = lp.prioritize_loops(knl, "I,k_outer,k_inner") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) print(lp.generate_body(knl)) @@ -354,7 +354,7 @@ def test_ispc_streaming_stores(): knl = lp.set_argument_order(knl, vars + ["n"]) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) lp.generate_code_v2(knl).all_code() diff --git a/test/test_transform.py b/test/test_transform.py index 6eb6697b5..9f3358f13 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -266,7 +266,7 @@ def test_vectorize(ctx_factory): knl = lp.tag_inames(knl, {"i_inner": "vec"}) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) + knl = lp.get_one_linearized_kernel(knl) code, inf = lp.generate_code(knl) lp.auto_test_vs_ref( -- GitLab From 1356dd65bda1a098bbd576352b257a3f951f6fb2 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 1 Mar 2020 22:05:15 -0600 Subject: [PATCH 2/8] rename+deprecate KernelState.SCHEDULED->KernelState.LINEARIZED (tests still failing) --- loopy/kernel/__init__.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9096edcc0..31aff6d49 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -101,7 +101,30 @@ class _UniqueVarNameGenerator(UniqueNameGenerator): class KernelState: # noqa INITIAL = 0 PREPROCESSED = 1 - SCHEDULED = 2 + LINEARIZED = 2 + + @property + def SCHEDULED(self): + warn( + "Use of 'KernelState.SCHEDULED' is deprecated, " + "use 'KernelState.LINEARIZED' instead.", + DeprecationWarning, stacklevel=2) + + return self.LINEARIZED + +''' + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.INITIAL) + key_builder.rec(key_hash, self.PREPROCESSED) + key_builder.rec(key_hash, self.LINEARIZED) + + def __hash__(self): + return hash(repr(self)) +''' # {{{ kernel_state, KernelState compataibility -- GitLab From 35c56fd76c3ea92969c7ef4fec08b4f7af1386fd Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 1 Mar 2020 22:52:46 -0600 Subject: [PATCH 3/8] changed @property to custom decorator to avoid hashing error when caching --- loopy/kernel/__init__.py | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 31aff6d49..55b8ff6f3 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -98,33 +98,25 @@ class _UniqueVarNameGenerator(UniqueNameGenerator): # {{{ loop kernel object -class KernelState: # noqa - INITIAL = 0 - PREPROCESSED = 1 - LINEARIZED = 2 +class _deprecated_KernelState_SCHEDULED(object): # noqa + def __init__(self, f): + self.f = f - @property - def SCHEDULED(self): + def __get__(self, obj, klass): warn( - "Use of 'KernelState.SCHEDULED' is deprecated, " - "use 'KernelState.LINEARIZED' instead.", + "'KernelState.SCHEDULE' is deprecated. " + "Use 'KernelState.LINEARIZED'.", DeprecationWarning, stacklevel=2) + return self.f() - return self.LINEARIZED - -''' - def update_persistent_hash(self, key_hash, key_builder): - """Custom hash computation function for use with - :class:`pytools.persistent_dict.PersistentDict`. - """ - - key_builder.rec(key_hash, self.INITIAL) - key_builder.rec(key_hash, self.PREPROCESSED) - key_builder.rec(key_hash, self.LINEARIZED) +class KernelState: # noqa + INITIAL = 0 + PREPROCESSED = 1 + LINEARIZED = 2 - def __hash__(self): - return hash(repr(self)) -''' + @_deprecated_KernelState_SCHEDULED + def SCHEDULED(): # pylint:disable=no-method-argument + return KernelState.LINEARIZED # {{{ kernel_state, KernelState compataibility -- GitLab From 44c33803d6bff1029fcda6f5816a11123a9c664e Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 1 Mar 2020 23:24:24 -0600 Subject: [PATCH 4/8] in LoopKernel.__init__, handle linearization variable that doesn't yet exist --- loopy/kernel/__init__.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 55b8ff6f3..e7e6aae6a 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -242,7 +242,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ constructor - def __init__(self, domains, instructions, args=None, schedule=None, + def __init__(self, domains, instructions, args=None, + schedule=None, + linearization=None, name="loopy_kernel", preambles=None, preamble_generators=None, @@ -351,6 +353,23 @@ class LoopKernel(ImmutableRecordWithoutPickling): ]: raise ValueError("invalid value for 'state'") + # `linearization` is replacing `schedule`, but we're not changing + # this under the hood yet, so for now, store it inside `schedule` + # and raise deprecation warning anyway + if schedule is not None: + if linearization is not None: + # these should not both be present + raise ValueError( + "received both `schedule` and `linearization` args, " + "'LoopKernel.schedule' is deprecated. " + "Use 'LoopKernel.linearization'.") + warn( + "'LoopKernel.schedule' is deprecated. " + "Use 'LoopKernel.linearization'.", + DeprecationWarning, stacklevel=2) + elif linearization is not None: + schedule = linearization + from collections import defaultdict assert not isinstance(iname_to_tags, defaultdict) @@ -1409,6 +1428,14 @@ class LoopKernel(ImmutableRecordWithoutPickling): # }}} + # {{{ handle linearization variable that doesn't yet exist + + @property + def linearization(self): + return self.schedule + + # }}} + # {{{ direct execution def __call__(self, *args, **kwargs): -- GitLab From 9fb7045bbee59c4e250d7c8bd9e4ff2a7f463f14 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 1 Mar 2020 23:31:52 -0600 Subject: [PATCH 5/8] changing get_one_linearized_kernel back to get_one_scheduled_kernel in most places to minimize merge conflicts with other branches (renaming will occur later) --- examples/python/global_barrier_removal.py | 4 +-- examples/python/ispc-stream-harness.py | 2 +- loopy/__init__.py | 6 +++-- loopy/codegen/__init__.py | 4 +-- loopy/kernel/tools.py | 4 +-- loopy/statistics.py | 2 +- loopy/target/execution.py | 4 +-- test/test_loopy.py | 32 +++++++++++------------ test/test_target.py | 8 +++--- test/test_transform.py | 2 +- 10 files changed, 35 insertions(+), 33 deletions(-) diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index ce37501e8..7ab049cd1 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -23,8 +23,8 @@ knl = lp.add_and_infer_dtypes(knl, from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) -from loopy.schedule import get_one_linearized_kernel -knl = get_one_linearized_kernel(knl) +from loopy.schedule import get_one_scheduled_kernel +knl = get_one_scheduled_kernel(knl) # map schedule onto host or device print(knl) diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py index 34636fdd9..fa581d426 100644 --- a/examples/python/ispc-stream-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -30,7 +30,7 @@ def transform(knl, vars, stream_dtype): def gen_code(knl): knl = lp.preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) codegen_result = lp.generate_code_v2(knl) return codegen_result.device_code() + "\n" + codegen_result.host_code() diff --git a/loopy/__init__.py b/loopy/__init__.py index b227bd1dc..67ad7de87 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -123,7 +123,8 @@ from loopy.transform.add_barrier import add_barrier from loopy.type_inference import infer_unknown_types from loopy.preprocess import preprocess_kernel, realize_reduction -from loopy.schedule import generate_loop_schedules, get_one_linearized_kernel +from loopy.schedule import ( + generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel) from loopy.statistics import (ToCountMap, CountGranularity, stringify_stats_mapping, Op, MemAccess, get_op_poly, get_op_map, get_lmem_access_poly, get_DRAM_access_poly, get_gmem_access_poly, get_mem_access_map, @@ -248,7 +249,8 @@ __all__ = [ "infer_unknown_types", "preprocess_kernel", "realize_reduction", - "generate_loop_schedules", "get_one_linearized_kernel", + "generate_loop_schedules", + "get_one_scheduled_kernel", "get_one_linearized_kernel", "GeneratedProgram", "CodeGenerationResult", "PreambleInfo", "generate_code", "generate_code_v2", "generate_body", diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index a6d57ee41..11f874e1b 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -385,8 +385,8 @@ def generate_code_v2(kernel): kernel = preprocess_kernel(kernel) if kernel.schedule is None: - from loopy.schedule import get_one_linearized_kernel - kernel = get_one_linearized_kernel(kernel) + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) if kernel.state != KernelState.SCHEDULED: raise LoopyError("cannot generate code for a kernel that has not been " diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 40e942261..bb6ae44c9 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -465,8 +465,8 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): if iname_cluster and not kernel.schedule: try: - from loopy.schedule import get_one_linearized_kernel - kernel = get_one_linearized_kernel(kernel) + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) except RuntimeError as e: iname_cluster = False from warnings import warn diff --git a/loopy/statistics.py b/loopy/statistics.py index f0b87d2e0..10d29daad 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1723,7 +1723,7 @@ def get_synchronization_map(knl, subgroup_size=None): from operator import mul knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) iname_list = [] result = ToCountMap() diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 07c324f24..c8f0d4090 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -758,8 +758,8 @@ class KernelExecutorBase(object): from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) - from loopy.schedule import get_one_linearized_kernel - kernel = get_one_linearized_kernel(kernel) + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) return kernel diff --git a/test/test_loopy.py b/test/test_loopy.py index f512b026a..6b78ac26b 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1065,8 +1065,8 @@ def test_kernel_splitting(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.schedule import get_one_linearized_kernel - knl = get_one_linearized_kernel(knl) + from loopy.schedule import get_one_scheduled_kernel + knl = get_one_scheduled_kernel(knl) # map schedule onto host or device print(knl) @@ -1106,8 +1106,8 @@ def test_kernel_splitting_with_loop(ctx_factory): from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) - from loopy.schedule import get_one_linearized_kernel - knl = get_one_linearized_kernel(knl) + from loopy.schedule import get_one_scheduled_kernel + knl = get_one_scheduled_kernel(knl) # map schedule onto host or device print(knl) @@ -1124,14 +1124,14 @@ def test_kernel_splitting_with_loop(ctx_factory): def save_and_reload_temporaries_test(queue, knl, out_expect, debug=False): from loopy.preprocess import preprocess_kernel - from loopy.schedule import get_one_linearized_kernel + from loopy.schedule import get_one_scheduled_kernel knl = preprocess_kernel(knl) - knl = get_one_linearized_kernel(knl) + knl = get_one_scheduled_kernel(knl) from loopy.transform.save import save_and_reload_temporaries knl = save_and_reload_temporaries(knl) - knl = get_one_linearized_kernel(knl) + knl = get_one_scheduled_kernel(knl) if debug: print(knl) @@ -1395,7 +1395,7 @@ def test_save_ambiguous_storage_requirements(): knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): @@ -1752,7 +1752,7 @@ def test_missing_global_barrier(): from loopy.diagnostic import MissingBarrierError with pytest.raises(MissingBarrierError): - lp.get_one_linearized_kernel(knl) + lp.get_one_scheduled_kernel(knl) def test_index_cse(ctx_factory): @@ -1884,7 +1884,7 @@ def test_const_temp_with_initializer_not_saved(): seq_dependencies=True) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) knl = lp.save_and_reload_temporaries(knl) # This ensures no save slot was added. @@ -2089,7 +2089,7 @@ def test_unscheduled_insn_detection(): """, "...") - knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) insn1, = lp.find_instructions(knl, "id:insn1") knl.instructions.append(insn1.copy(id="insn2")) @@ -2254,7 +2254,7 @@ def test_barrier_insertion_near_top_of_loop(): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.set_temporary_scope(knl, "b", "local") - knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) print(knl) @@ -2281,7 +2281,7 @@ def test_barrier_insertion_near_bottom_of_loop(): knl = lp.tag_inames(knl, dict(i="l.0")) knl = lp.set_temporary_scope(knl, "a", "local") knl = lp.set_temporary_scope(knl, "b", "local") - knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl)) + knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) print(knl) @@ -2650,7 +2650,7 @@ def test_check_for_variable_access_ordering(): from loopy.diagnostic import VariableAccessNotOrdered with pytest.raises(VariableAccessNotOrdered): - lp.get_one_linearized_kernel(knl) + lp.get_one_scheduled_kernel(knl) def test_check_for_variable_access_ordering_with_aliasing(): @@ -2669,7 +2669,7 @@ def test_check_for_variable_access_ordering_with_aliasing(): from loopy.diagnostic import VariableAccessNotOrdered with pytest.raises(VariableAccessNotOrdered): - lp.get_one_linearized_kernel(knl) + lp.get_one_scheduled_kernel(knl) @pytest.mark.parametrize(("second_index", "expect_barrier"), @@ -2692,7 +2692,7 @@ def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): knl = lp.tag_inames(knl, "i:l.0") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) assert barrier_between(knl, "first", "second") == expect_barrier diff --git a/test/test_target.py b/test/test_target.py index e83d28154..bcf85a340 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -73,7 +73,7 @@ def test_ispc_target(occa_mode=False): default_tag="l.auto") codegen_result = lp.generate_code_v2( - lp.get_one_linearized_kernel( + lp.get_one_scheduled_kernel( lp.preprocess_kernel(knl))) print(codegen_result.device_code()) @@ -99,7 +99,7 @@ def test_cuda_target(): print( lp.generate_code( - lp.get_one_linearized_kernel( + lp.get_one_scheduled_kernel( lp.preprocess_kernel(knl)))[0]) @@ -142,7 +142,7 @@ def test_generate_c_snippet(): knl = lp.prioritize_loops(knl, "I,k_outer,k_inner") knl = lp.preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) print(lp.generate_body(knl)) @@ -354,7 +354,7 @@ def test_ispc_streaming_stores(): knl = lp.set_argument_order(knl, vars + ["n"]) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) lp.generate_code_v2(knl).all_code() diff --git a/test/test_transform.py b/test/test_transform.py index 9f3358f13..6eb6697b5 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -266,7 +266,7 @@ def test_vectorize(ctx_factory): knl = lp.tag_inames(knl, {"i_inner": "vec"}) knl = lp.preprocess_kernel(knl) - knl = lp.get_one_linearized_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) code, inf = lp.generate_code(knl) lp.auto_test_vs_ref( -- GitLab From a371a8cf21d7f90f2083f6e3efc2f8a2952814ab Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Tue, 3 Mar 2020 01:36:54 -0600 Subject: [PATCH 6/8] fixed typo KernelState.SCHEDULE->KernelState.SCHEDULED --- loopy/kernel/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index e7e6aae6a..5778de243 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -104,7 +104,7 @@ class _deprecated_KernelState_SCHEDULED(object): # noqa def __get__(self, obj, klass): warn( - "'KernelState.SCHEDULE' is deprecated. " + "'KernelState.SCHEDULED' is deprecated. " "Use 'KernelState.LINEARIZED'.", DeprecationWarning, stacklevel=2) return self.f() -- GitLab From a265a45aa34036fd3aee9b5374f11727cb96ff5f Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Tue, 3 Mar 2020 01:39:42 -0600 Subject: [PATCH 7/8] in LoopKernel string, print LINEARIZATION instead of SCHEDULE --- loopy/kernel/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 5778de243..c3cd1738d 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1378,7 +1378,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): if "schedule" in what and kernel.schedule is not None: lines.extend(sep) if show_labels: - lines.append("SCHEDULE:") + lines.append("LINEARIZATION:") from loopy.schedule import dump_schedule lines.append(dump_schedule(kernel, kernel.schedule)) -- GitLab From bf21e0dba13818d3ca2320572bd2a89f7d0167eb Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Tue, 3 Mar 2020 08:15:31 -0600 Subject: [PATCH 8/8] change SCHEDULE->LINEARIZATION in doctests --- doc/tutorial.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 67ca22bfa..1b017f701 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1213,7 +1213,7 @@ should call :func:`loopy.get_one_linearized_kernel`: --------------------------------------------------------------------------- ... --------------------------------------------------------------------------- - SCHEDULE: + LINEARIZATION: 0: CALL KERNEL rotate_v2(extra_args=[], extra_inames=[]) 1: tmp = arr[i_inner + i_outer*16] {id=maketmp} 2: RETURN FROM KERNEL rotate_v2 @@ -1251,7 +1251,7 @@ put those instructions into the schedule. --------------------------------------------------------------------------- ... --------------------------------------------------------------------------- - SCHEDULE: + LINEARIZATION: 0: CALL KERNEL rotate_v2(extra_args=['tmp_save_slot'], extra_inames=[]) 1: tmp = arr[i_inner + i_outer*16] {id=maketmp} 2: tmp_save_slot[tmp_save_hw_dim_0_rotate_v2, tmp_save_hw_dim_1_rotate_v2] = tmp {id=tmp.save} -- GitLab