From b6d6ef374bb1432955b1760dd3fd806ddcdec716 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 6 Feb 2018 13:49:15 -0600 Subject: [PATCH 01/40] Add check for variable ordering and language versioning scheme --- doc/misc.rst | 4 +- doc/ref_creation.rst | 2 + examples/python/hello-loopy.py | 3 +- loopy/__init__.py | 4 ++ loopy/check.py | 115 +++++++++++++++++++++++++++++++++ loopy/kernel/creation.py | 35 ++++++++++ loopy/options.py | 11 ++++ loopy/version.py | 52 +++++++++++++++ test/test_dg.py | 3 +- test/test_loopy.py | 13 ++-- 10 files changed, 235 insertions(+), 7 deletions(-) diff --git a/doc/misc.rst b/doc/misc.rst index cd6fe102c..2c9c9a92b 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -90,7 +90,9 @@ regarding OpenCL drivers. User-visible Changes ==================== -Version 2017.2 +See also :ref:`language-versioning`. + +Version 2018.1 -------------- .. note:: diff --git a/doc/ref_creation.rst b/doc/ref_creation.rst index 92eff09c9..6b715033c 100644 --- a/doc/ref_creation.rst +++ b/doc/ref_creation.rst @@ -30,4 +30,6 @@ To Copy between Data Formats .. autofunction:: make_copy_kernel +.. automodule:: loopy.version + .. vim: tw=75:spell:fdm=marker diff --git a/examples/python/hello-loopy.py b/examples/python/hello-loopy.py index 7c5de5a1b..e7ab13c16 100644 --- a/examples/python/hello-loopy.py +++ b/examples/python/hello-loopy.py @@ -15,7 +15,8 @@ a = cl.array.arange(queue, n, dtype=np.float32) # ------ knl = lp.make_kernel( "{ [i]: 0<=i= (2018, 1): + options = options.copy(enforce_check_variable_access_ordered=True) + if isinstance(silenced_warnings, str): silenced_warnings = silenced_warnings.split(";") diff --git a/loopy/options.py b/loopy/options.py index 13d0b752d..4277d999a 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -162,6 +162,14 @@ class Options(ImmutableRecord): .. rubric:: Features .. attribute:: disable_global_barriers + + .. attribute:: enforce_check_variable_access_ordered + + If *True*, require that + :func:`loopy.check.check_variable_access_ordered` passes. + Required for language versions 2018.1 and above. This check + helps find and eliminate unintentionally unordered access + to variables. """ _legacy_options_map = { @@ -216,6 +224,9 @@ class Options(ImmutableRecord): disable_global_barriers=kwargs.get("disable_global_barriers", False), check_dep_resolution=kwargs.get("check_dep_resolution", True), + + enforce_check_variable_access_ordered=kwargs.get( + "enforce_check_variable_access_ordered", False), ) # {{{ legacy compatibility diff --git a/loopy/version.py b/loopy/version.py index 7141a6782..21c920ce4 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -33,3 +33,55 @@ else: _islpy_version = islpy.version.VERSION_TEXT DATA_MODEL_VERSION = "v76-islpy%s" % _islpy_version + + +FALLBACK_LANGUAGE_VERSION = (2017, 2, 1) +MOST_RECENT_LANGUAGE_VERSION = (2018, 1) + +__doc__ = """ + +.. currentmodule:: loopy +.. data:: VERSION + + A tuple representing the current version number of loopy, for example + **(2017, 2, 1)**. Direct comparison of these tuples will always yield + valid version comparisons. + +.. _language-versioning: + +Loopy Language Versioning +------------------------- + +At version 2018.1, :mod:`loopy` introduced a language versioning scheme to make +it easier to evolve the language while retaining backward compatibility. What +prompted this is the addition of +:attr:`loopy.Options.enforce_check_variable_access_ordered`, which (despite +its name) serves to enable a new check that helps ensure that all variable +access in a kernel is ordered as intended. Since that has the potential to +break existing programs, kernels now have to declare support for a given +language version to let them take advantage of this check. + +As a result, :mod:`loopy` will now issue a warning when a call to +:func:`loopy.make_kernel` does not declare a language version. Such kernels will +(indefinitely) default to language version 2017.2.1. + +Language versions will generally reflect the version number of :mod:`loopy` in +which they were introduced, though it is possible that some versions of +:mod:`loopy` do not introduce new user-visible language features. In such +situations, the previous language version number remains. + + +.. data:: MOST_RECENT_LANGUAGE_VERSION + + A tuple representing the most recent language version number of loopy, for + example **(2018, 1)**. Direct comparison of these tuples will always + yield valid version comparisons. + +History of Language Versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``(2018, 1)``: :attr:`loopy.Options.enforce_check_variable_access_ordered` + is turned on by default. + +* ``(2017, 2, 1)``: Initial legacy language version. +""" diff --git a/test/test_dg.py b/test/test_dg.py index d65c68ed4..ef4a31373 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -72,7 +72,8 @@ def test_dg_volume(ctx_factory): order=order), lp.ValueArg("K", np.int32, approximately=1000), ], - name="dg_volume", assumptions="K>=1") + name="dg_volume", assumptions="K>=1", + lang_version=(2018, 1)) knl = lp.fix_parameters(knl, Np=Np) diff --git a/test/test_loopy.py b/test/test_loopy.py index e36a4c2c3..02002c5cd 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -67,7 +67,8 @@ def test_globals_decl_once_with_multi_subprogram(ctx_factory): [lp.TemporaryVariable( 'cnst', shape=('n'), initializer=cnst, scope=lp.temp_var_scope.GLOBAL, - read_only=True), '...']) + read_only=True), '...'], + lang_version=(2018, 1)) knl = lp.fix_parameters(knl, n=16) knl = lp.add_barrier(knl, "id:first", "id:second") @@ -88,7 +89,8 @@ def test_complicated_subst(ctx_factory): h(x) := 1 + g(x) + 20*g$two(x) a[i] = h$one(i) * h$two(i) - """) + """, + lang_version=(2018, 1)) knl = lp.expand_subst(knl, "... > id:h and tag:two > id:g and tag:two") @@ -119,7 +121,8 @@ def test_type_inference_no_artificial_doubles(ctx_factory): lp.GlobalArg("c", np.float32, shape=("n",)), lp.ValueArg("n", np.int32), ], - assumptions="n>=1") + assumptions="n>=1", + lang_version=(2018, 1)) knl = lp.preprocess_kernel(knl, ctx.devices[0]) for k in lp.generate_loop_schedules(knl): @@ -139,7 +142,9 @@ def test_type_inference_with_type_dependencies(): c = b + c <>d = b + 2 + 1j """, - "...") + "...", + lang_version=(2018, 1)) + knl = lp.infer_unknown_types(knl) from loopy.types import to_loopy_type -- GitLab From d02a9f511aaa401ccefa462de95ff0823836b16a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 6 Feb 2018 17:25:40 -0600 Subject: [PATCH 02/40] check_variable_access_ordered: Only perform check if variable is written --- loopy/check.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index b4e117e25..fa74fd43b 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -447,6 +447,12 @@ def check_variable_access_ordered(kernel): depfind = IndirectDependencyEdgeFinder(kernel) for name in checked_variables: + readers = rmap.get(name, set()) + writers = wmap.get(name, set()) + + if not writers: + continue + if name in kernel.temporary_variables: scope = kernel.temporary_variables[name].scope else: @@ -456,13 +462,12 @@ def check_variable_access_ordered(kernel): elif isinstance(arg, ValueArg): scope = temp_var_scope.PRIVATE else: + # No need to consider ConstantArg and ImageArg (for now) + # because those won't be written. raise ValueError("could not determine scope of '%s'" % name) # Check even for PRIVATE scope, to ensure intentional program order. - readers = rmap.get(name, set()) - writers = wmap.get(name, set()) - for writer_id in writers: for other_id in readers | writers: if writer_id == other_id: -- GitLab From 6475c9fb99547ff1a820f92c680e7724a6f18831 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 6 Feb 2018 17:46:05 -0600 Subject: [PATCH 03/40] Allow lang version setting with LOOPY_KERNEL_LANGUAGE_VERSION global in make_kernel caller's global space --- examples/python/hello-loopy.py | 6 ++-- loopy/frontend/fortran/translator.py | 2 ++ loopy/kernel/creation.py | 43 ++++++++++++++++++++-------- loopy/version.py | 8 ++++-- test/test_apps.py | 3 ++ test/test_c_execution.py | 3 ++ test/test_dg.py | 3 ++ test/test_diff.py | 3 ++ test/test_domain.py | 3 ++ test/test_linalg.py | 3 ++ test/test_loopy.py | 16 +++++------ test/test_misc.py | 3 ++ test/test_nbody.py | 3 ++ test/test_numa_diff.py | 3 ++ test/test_reduction.py | 3 ++ test/test_scan.py | 3 ++ test/test_sem_reagan.py | 3 ++ test/test_statistics.py | 3 ++ test/test_target.py | 3 ++ test/test_transform.py | 3 ++ 20 files changed, 95 insertions(+), 25 deletions(-) diff --git a/examples/python/hello-loopy.py b/examples/python/hello-loopy.py index e7ab13c16..da1273d2b 100644 --- a/examples/python/hello-loopy.py +++ b/examples/python/hello-loopy.py @@ -8,6 +8,9 @@ import pyopencl.array ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) +# for make_kernel calls from this file +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + n = 15 * 10**6 a = cl.array.arange(queue, n, dtype=np.float32) @@ -15,8 +18,7 @@ a = cl.array.arange(queue, n, dtype=np.float32) # ------ knl = lp.make_kernel( "{ [i]: 0<=i= (2018, 1): options = options.copy(enforce_check_variable_access_ordered=True) diff --git a/loopy/version.py b/loopy/version.py index 21c920ce4..49dca90fe 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -62,8 +62,12 @@ break existing programs, kernels now have to declare support for a given language version to let them take advantage of this check. As a result, :mod:`loopy` will now issue a warning when a call to -:func:`loopy.make_kernel` does not declare a language version. Such kernels will -(indefinitely) default to language version 2017.2.1. +:func:`loopy.make_kernel` does not declare a language version. Such kernels +will (indefinitely) default to language version 2017.2.1. If passing a +language version to :func:`make_kernel` is impractical, you may also place a +global variable ``LOOPY_KERNEL_LANGUAGE_VERSION`` in the global namespace of +the function calling :func:`make_kernel`. If *lang_version* is not explicitly +given, that its value will be used. Language versions will generally reflect the version number of :mod:`loopy` in which they were introduced, though it is possible that some versions of diff --git a/test/test_apps.py b/test/test_apps.py index c4844d3a3..55eecdf2b 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -49,6 +49,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + # {{{ convolutions def test_convolution(ctx_factory): diff --git a/test/test_c_execution.py b/test/test_c_execution.py index d1b3c95ca..582f3a105 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -40,6 +40,9 @@ else: faulthandler.enable() +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_c_target(): from loopy.target.c import ExecutableCTarget diff --git a/test/test_dg.py b/test/test_dg.py index ef4a31373..6688362aa 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -34,6 +34,9 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_dg_volume(ctx_factory): #logging.basicConfig(level=logging.DEBUG) diff --git a/test/test_diff.py b/test/test_diff.py index 95471f9b1..8a4fe9587 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -48,6 +48,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_diff(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) diff --git a/test/test_domain.py b/test/test_domain.py index 9d0379a50..d8a83007a 100644 --- a/test/test_domain.py +++ b/test/test_domain.py @@ -52,6 +52,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_assume(ctx_factory): ctx = ctx_factory() diff --git a/test/test_linalg.py b/test/test_linalg.py index 3d422f1d8..7eba5facb 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -62,6 +62,9 @@ def check_float4(result, ref_result): ref_result[comp], result[comp], rtol=1e-3, atol=1e-3), None +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_axpy(ctx_factory): logging.basicConfig(level="INFO") ctx = ctx_factory() diff --git a/test/test_loopy.py b/test/test_loopy.py index 02002c5cd..d9bc3d271 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -52,6 +52,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_globals_decl_once_with_multi_subprogram(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -67,8 +70,7 @@ def test_globals_decl_once_with_multi_subprogram(ctx_factory): [lp.TemporaryVariable( 'cnst', shape=('n'), initializer=cnst, scope=lp.temp_var_scope.GLOBAL, - read_only=True), '...'], - lang_version=(2018, 1)) + read_only=True), '...']) knl = lp.fix_parameters(knl, n=16) knl = lp.add_barrier(knl, "id:first", "id:second") @@ -89,8 +91,7 @@ def test_complicated_subst(ctx_factory): h(x) := 1 + g(x) + 20*g$two(x) a[i] = h$one(i) * h$two(i) - """, - lang_version=(2018, 1)) + """) knl = lp.expand_subst(knl, "... > id:h and tag:two > id:g and tag:two") @@ -121,8 +122,7 @@ def test_type_inference_no_artificial_doubles(ctx_factory): lp.GlobalArg("c", np.float32, shape=("n",)), lp.ValueArg("n", np.int32), ], - assumptions="n>=1", - lang_version=(2018, 1)) + assumptions="n>=1") knl = lp.preprocess_kernel(knl, ctx.devices[0]) for k in lp.generate_loop_schedules(knl): @@ -142,9 +142,7 @@ def test_type_inference_with_type_dependencies(): c = b + c <>d = b + 2 + 1j """, - "...", - lang_version=(2018, 1)) - + "...") knl = lp.infer_unknown_types(knl) from loopy.types import to_loopy_type diff --git a/test/test_misc.py b/test/test_misc.py index 0273948b3..c1ae6c532 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -32,6 +32,9 @@ import logging logger = logging.getLogger(__name__) +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_compute_sccs(): from loopy.tools import compute_sccs import random diff --git a/test/test_nbody.py b/test/test_nbody.py index e118b04b9..f231dfd5b 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -34,6 +34,9 @@ import logging logger = logging.getLogger(__name__) +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_nbody(ctx_factory): logging.basicConfig(level=logging.INFO) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index eff3dbd0e..a5c69020a 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -44,6 +44,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + @pytest.mark.parametrize("Nq", [7]) @pytest.mark.parametrize("ilp_multiple", [1, 2]) @pytest.mark.parametrize("opt_level", [11]) diff --git a/test/test_reduction.py b/test/test_reduction.py index 909a800b2..1ddbbfebf 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -49,6 +49,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_nonsense_reduction(ctx_factory): ctx = ctx_factory() diff --git a/test/test_scan.py b/test/test_scan.py index 08754819c..228453fd8 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -56,6 +56,9 @@ __all__ = [ # - scan(a) + scan(b) # - test for badly tagged inames +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + @pytest.mark.parametrize("n", [1, 2, 3, 16]) @pytest.mark.parametrize("stride", [1, 2]) def test_sequential_scan(ctx_factory, n, stride): diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index 0571e4191..a92f2b2ab 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -31,6 +31,9 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_tim2d(ctx_factory): dtype = np.float32 ctx = ctx_factory() diff --git a/test/test_statistics.py b/test/test_statistics.py index eeb4a5a28..9427a4edf 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -34,6 +34,9 @@ import numpy as np from pymbolic.primitives import Variable +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_op_counter_basic(): knl = lp.make_kernel( diff --git a/test/test_target.py b/test/test_target.py index d3cf2670c..8fd565a81 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -52,6 +52,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_ispc_target(occa_mode=False): from loopy.target.ispc import ISPCTarget diff --git a/test/test_transform.py b/test/test_transform.py index 0e10db362..feb8bf9df 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -49,6 +49,9 @@ __all__ = [ ] +LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) + + def test_chunk_iname(ctx_factory): ctx = ctx_factory() -- GitLab From 5c090c40450121747c96ce8ea82b8fce2b4405c4 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 6 Feb 2018 17:51:50 -0600 Subject: [PATCH 04/40] Improve version warning message --- loopy/kernel/creation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index f43bf8490..91624f8c8 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1998,8 +1998,10 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): FALLBACK_LANGUAGE_VERSION) warn("'lang_version' was not passed to make_kernel(). " "To avoid this warning, pass " - "lang_version=%r in this invocation." - % (MOST_RECENT_LANGUAGE_VERSION,), + "lang_version={ver} in this invocation. " + "(Or set LOOPY_KERNEL_LANGUAGE_VERSION = {ver} in " + "the global scope of the calling frame.)" + .format(ver=MOST_RECENT_LANGUAGE_VERSION), LoopyWarning, stacklevel=2) lang_version = FALLBACK_LANGUAGE_VERSION -- GitLab From 61c76a71eb3bb2d76e68ead546e3deff6dfbec3d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 6 Feb 2018 17:51:58 -0600 Subject: [PATCH 05/40] Doc typo fix --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 49dca90fe..74b27c862 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -67,7 +67,7 @@ will (indefinitely) default to language version 2017.2.1. If passing a language version to :func:`make_kernel` is impractical, you may also place a global variable ``LOOPY_KERNEL_LANGUAGE_VERSION`` in the global namespace of the function calling :func:`make_kernel`. If *lang_version* is not explicitly -given, that its value will be used. +given, this value will be used. Language versions will generally reflect the version number of :mod:`loopy` in which they were introduced, though it is possible that some versions of -- GitLab From 6041e91d6ce3b6ecbca5c191bd225f6e98c53b79 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 6 Feb 2018 18:35:04 -0600 Subject: [PATCH 06/40] Discuss new semantics of no_sync_with --- loopy/kernel/instruction.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 9d95408ac..a2991c47b 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -99,13 +99,18 @@ class InstructionBase(ImmutableRecord): - `"global"` - `"any"`. - This indicates no barrier synchronization is necessary with the given + This indicates (symmetrically) that program semantics are not affected by + execution ordering of the involved instructions. In particular, + no barrier synchronization will be considered necessary with the given instruction using barriers of type `scope`, even given the existence of a dependency chain and apparently conflicting access. Note, that :attr:`no_sync_with` allows instruction matching through wildcards and match expression, just like :attr:`depends_on`. + This data is used specifically by barrier insertion and + :func:`loopy.check.enforce_variable_access_ordered`. + .. rubric:: Conditionals .. attribute:: predicates -- GitLab From bf7157bbf3f0c06f33eae431fb205525d1e00aea Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 6 Feb 2018 18:38:27 -0600 Subject: [PATCH 07/40] Naming/doc tweaks for check_variable_access_ordered --- loopy/check.py | 18 +++++++++++------- loopy/kernel/creation.py | 2 +- loopy/options.py | 6 +++--- loopy/version.py | 12 ++++++------ 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index fa74fd43b..ac9ca4463 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -431,8 +431,12 @@ def needs_no_sync_with(kernel, var_scope, dep_a_id, dep_b_id): def check_variable_access_ordered(kernel): - """Checks that all writes are ordered with respect to all other access to - the written variable. + """Checks that between each write to a variable and all other accesses to + the variable there is either: + + * an (at least indirect) depdendency edge, or + * an explicit statement that no ordering is necessary (expressed + through :attr:`loopy.Instruuction.no_sync_with`) """ checked_variables = ( kernel.get_written_variables() @@ -473,25 +477,25 @@ def check_variable_access_ordered(kernel): if writer_id == other_id: continue - has_ordering_relationship = ( + has_dependency_relationship = ( needs_no_sync_with(kernel, scope, other_id, writer_id) or depfind(writer_id, other_id) or depfind(other_id, writer_id)) - if not has_ordering_relationship: - msg = ("No ordering relationship found between " + if not has_dependency_relationship: + msg = ("No dependency relationship found between " "'{writer_id}' which writes '{var}' and " "'{other_id}' which also accesses '{var}'. " "Please either add a (possibly indirect) dependency " "between the two, or add one to the other's no_sync set " - "to indicate that no ordering is intended." + "to indicate that no ordering is intended. " .format( writer_id=writer_id, other_id=other_id, var=name)) - if kernel.options.enforce_check_variable_access_ordered: + if kernel.options.enforce_variable_access_ordered: raise LoopyError(msg) else: from loopy.diagnostic import warn_with_kernel diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 91624f8c8..36ab4f1dd 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -2007,7 +2007,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): lang_version = FALLBACK_LANGUAGE_VERSION if lang_version >= (2018, 1): - options = options.copy(enforce_check_variable_access_ordered=True) + options = options.copy(enforce_variable_access_ordered=True) if isinstance(silenced_warnings, str): silenced_warnings = silenced_warnings.split(";") diff --git a/loopy/options.py b/loopy/options.py index 4277d999a..f23d72816 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -163,7 +163,7 @@ class Options(ImmutableRecord): .. attribute:: disable_global_barriers - .. attribute:: enforce_check_variable_access_ordered + .. attribute:: enforce_variable_access_ordered If *True*, require that :func:`loopy.check.check_variable_access_ordered` passes. @@ -225,8 +225,8 @@ class Options(ImmutableRecord): False), check_dep_resolution=kwargs.get("check_dep_resolution", True), - enforce_check_variable_access_ordered=kwargs.get( - "enforce_check_variable_access_ordered", False), + enforce_variable_access_ordered=kwargs.get( + "enforce_variable_access_ordered", False), ) # {{{ legacy compatibility diff --git a/loopy/version.py b/loopy/version.py index 74b27c862..d3b020361 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -55,7 +55,7 @@ Loopy Language Versioning At version 2018.1, :mod:`loopy` introduced a language versioning scheme to make it easier to evolve the language while retaining backward compatibility. What prompted this is the addition of -:attr:`loopy.Options.enforce_check_variable_access_ordered`, which (despite +:attr:`loopy.Options.enforce_variable_access_ordered`, which (despite its name) serves to enable a new check that helps ensure that all variable access in a kernel is ordered as intended. Since that has the potential to break existing programs, kernels now have to declare support for a given @@ -70,10 +70,10 @@ the function calling :func:`make_kernel`. If *lang_version* is not explicitly given, this value will be used. Language versions will generally reflect the version number of :mod:`loopy` in -which they were introduced, though it is possible that some versions of -:mod:`loopy` do not introduce new user-visible language features. In such -situations, the previous language version number remains. - +which they were introduced, though it is likely that most versions of +:mod:`loopy` do not introduce language incompatibilities. In such +situations, the previous language version number remains. (In fact, we +will work hard to avoid backward-incompatible language changes.) .. data:: MOST_RECENT_LANGUAGE_VERSION @@ -84,7 +84,7 @@ situations, the previous language version number remains. History of Language Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* ``(2018, 1)``: :attr:`loopy.Options.enforce_check_variable_access_ordered` +* ``(2018, 1)``: :attr:`loopy.Options.enforce_variable_access_ordered` is turned on by default. * ``(2017, 2, 1)``: Initial legacy language version. -- GitLab From 8cf57ccf02231dfdc7ca051d25bdbe0d067a5d6a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 00:29:37 -0600 Subject: [PATCH 08/40] add_nosync: Better error reporting --- loopy/transform/instruction.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 37c5d85a1..676a3db66 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -264,6 +264,11 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): sinks = frozenset( sink.id for sink in find_instructions(kernel, sink)) + if not sources: + raise LoopyError("No match found for source specification '%s'." % source) + if not sinks: + raise LoopyError("No match found for sink specification '%s'." % sink) + def insns_in_conflicting_groups(insn1_id, insn2_id): insn1 = kernel.id_to_insn[insn1_id] insn2 = kernel.id_to_insn[insn2_id] @@ -275,11 +280,12 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): from collections import defaultdict nosync_to_add = defaultdict(set) + rec_dep_map = kernel.recursive_insn_dep_map() for sink in sinks: for source in sources: needs_nosync = force or ( - source in kernel.recursive_insn_dep_map()[sink] + source in rec_dep_map[sink] or insns_in_conflicting_groups(source, sink)) if not needs_nosync: @@ -289,6 +295,12 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): if bidirectional: nosync_to_add[source].add((sink, scope)) + if not nosync_to_add: + raise LoopyError("No nosync annotations were added as a result " + "of this call. add_nosync will (by default) only add them to " + "accompany existing depencies or group exclusions. Maybe you want " + "to pass force=True?") + new_instructions = list(kernel.instructions) for i, insn in enumerate(new_instructions): -- GitLab From d9393ca0b3fa3e6043b5a41c18fd1ddc29c61c69 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 00:30:02 -0600 Subject: [PATCH 09/40] Fix tests for stricter dep check --- test/test_apps.py | 5 +++-- test/test_loopy.py | 12 ++++++------ test/test_numa_diff.py | 3 ++- test/test_target.py | 6 ++++-- test/test_transform.py | 2 +- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/test/test_apps.py b/test/test_apps.py index 55eecdf2b..3be133d94 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -197,7 +197,7 @@ def test_rob_stroud_bernstein(ctx_factory): for alpha2 tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \ - {id=write_tmp} + {id=write_tmp,dep=init_w:aind_init} w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ {id=update_w,dep=init_w:write_tmp} aind = aind + 1 \ @@ -491,7 +491,8 @@ def test_lbm(ctx_factory): f_new[i, j, 11] = + 0.25*m[8] - 0.125*m[10] - 0.25*m[11] end end - """) + """, + lang_version=(2017, 2, 1)) knl = lp.add_and_infer_dtypes(knl, {"f": np.float32}) diff --git a/test/test_loopy.py b/test/test_loopy.py index d9bc3d271..8229f613e 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1088,12 +1088,12 @@ def test_atomic_load(ctx_factory, dtype): "{ [i,j]: 0<=i,j upper = 0 - <> lower = 0 + <> upper = 0 {id=init_upper} + <> lower = 0 {id=init_lower} temp = 0 {id=init, atomic} for i - upper = upper + i * a[i] {id=sum0} - lower = lower - b[i] {id=sum1} + upper = upper + i * a[i] {id=sum0,dep=init_upper} + lower = lower - b[i] {id=sum1,dep=init_lower} end temp = temp + lower {id=temp_sum, dep=sum*:init, atomic,\ nosync=init} @@ -2632,8 +2632,8 @@ def test_fixed_parameters(ctx_factory): knl = lp.make_kernel( "[n] -> {[i]: 0 <= i < n}", """ - <>tmp[i] = i - tmp[0] = 0 + <>tmp[i] = i {id=init} + tmp[0] = 0 {dep=init} """, fixed_parameters=dict(n=1)) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index a5c69020a..7bacad75f 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -60,13 +60,14 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa source = source.replace("datafloat", "real*4") hsv_r, hsv_s = [ - knl for knl in lp.parse_fortran(source, filename, auto_dependencies=False) + knl for knl in lp.parse_fortran(source, filename, seq_dependencies=False) if "KernelR" in knl.name or "KernelS" in knl.name ] hsv_r = lp.tag_instructions(hsv_r, "rknl") hsv_s = lp.tag_instructions(hsv_s, "sknl") hsv = lp.fuse_kernels([hsv_r, hsv_s], ["_r", "_s"]) #hsv = hsv_s + hsv = lp.add_nosync(hsv, "any", "writes:rhsQ", "writes:rhsQ", force=True) from gnuma_loopy_transforms import ( fix_euler_parameters, diff --git a/test/test_target.py b/test/test_target.py index 8fd565a81..71a2548c1 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -206,8 +206,8 @@ def test_random123(ctx_factory, tp): <> key2 = make_uint2(i, 324830944) {inames=i} <> key4 = make_uint4(i, 324830944, 234181, 2233) {inames=i} <> ctr = make_uint4(0, 1, 2, 3) {inames=i,id=init_ctr} - <> real, ctr = philox4x32_TYPE(ctr, key2) {dep=init_ctr} - <> imag, ctr = threefry4x32_TYPE(ctr, key4) {dep=init_ctr} + <> real, ctr = philox4x32_TYPE(ctr, key2) {id=realpart,dep=init_ctr} + <> imag, ctr = threefry4x32_TYPE(ctr, key4) {dep=init_ctr:realpart} out[i, 0] = real.s0 + 1j * imag.s0 out[i, 1] = real.s1 + 1j * imag.s1 @@ -215,6 +215,8 @@ def test_random123(ctx_factory, tp): out[i, 3] = real.s3 + 1j * imag.s3 """.replace("TYPE", tp)) + knl = lp.add_nosync(knl, "any", "writes:out", "writes:out", force=True) + knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") knl = lp.set_options(knl, write_cl=True) diff --git a/test/test_transform.py b/test/test_transform.py index feb8bf9df..d9ad97240 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -78,7 +78,7 @@ def test_collect_common_factors(ctx_factory): """ out_tmp = 0 {id=out_init,inames=i} out_tmp = out_tmp + alpha[i]*a[i,j]*b1[j] {id=out_up1,dep=out_init} - out_tmp = out_tmp + alpha[i]*a[j,i]*b2[j] {id=out_up2,dep=out_init} + out_tmp = out_tmp + alpha[i]*a[j,i]*b2[j] {id=out_up2,dep=out_init,nosync=} out[i] = out_tmp {dep=out_up1:out_up2} """) knl = lp.add_and_infer_dtypes(knl, -- GitLab From 97f3368e1c30419cb9ac59bbfb149041bdc88b0b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 17:20:44 -0600 Subject: [PATCH 10/40] Allow no_check value for enforce_variable_access_ordered option --- loopy/check.py | 3 +++ loopy/options.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index ac9ca4463..d8994b22d 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -438,6 +438,9 @@ def check_variable_access_ordered(kernel): * an explicit statement that no ordering is necessary (expressed through :attr:`loopy.Instruuction.no_sync_with`) """ + if kernel.options.enforce_variable_access_ordered == "no_check": + return + checked_variables = ( kernel.get_written_variables() | set(kernel.temporary_variables) diff --git a/loopy/options.py b/loopy/options.py index f23d72816..63089d94d 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -170,6 +170,8 @@ class Options(ImmutableRecord): Required for language versions 2018.1 and above. This check helps find and eliminate unintentionally unordered access to variables. + + If equal to ``"no_check"``, then no check is performed. """ _legacy_options_map = { -- GitLab From 890d1c0fbaccbeecc32afb03a4746b33ccb094bd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 17:22:00 -0600 Subject: [PATCH 11/40] Emit output assignments from reductions in defined order --- loopy/preprocess.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index ad119e94e..5e36e51a1 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -1861,9 +1861,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, # An expansion happened, so insert the generated stuff plus # ourselves back into the queue. + result_assignment_dep_on = \ + insn.depends_on | frozenset(new_insn_add_depends_on) kwargs = insn.get_copy_kwargs( - depends_on=insn.depends_on - | frozenset(new_insn_add_depends_on), no_sync_with=insn.no_sync_with | frozenset(new_insn_add_no_sync_with), within_inames=( @@ -1871,6 +1871,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, | new_insn_add_within_inames)) kwargs.pop("id") + kwargs.pop("depends_on") kwargs.pop("expression") kwargs.pop("assignee", None) kwargs.pop("assignees", None) @@ -1878,20 +1879,27 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, kwargs.pop("temp_var_types", None) if isinstance(insn.expression, Reduction) and nresults > 1: + result_assignment_ids = [ + insn_id_gen(insn.id) for i in range(nresults)] replacement_insns = [ lp.Assignment( - id=insn_id_gen(insn.id), + id=result_assignment_ids[i], + depends_on=( + result_assignment_dep_on + | (frozenset([result_assignment_ids[i-1]]) + if i else frozenset())), assignee=assignee, expression=new_expr, **kwargs) - for assignee, new_expr in zip( - insn.assignees, new_expressions)] + for i, (assignee, new_expr) in enumerate(zip( + insn.assignees, new_expressions))] else: new_expr, = new_expressions replacement_insns = [ make_assignment( id=insn_id_gen(insn.id), + depends_on=result_assignment_dep_on, assignees=insn.assignees, expression=new_expr, **kwargs) -- GitLab From 45d329e78b18b816a5e49c9ac724c1ea0bde4430 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 17:23:43 -0600 Subject: [PATCH 12/40] Fix undefined order errors in tests --- test/test_apps.py | 13 +++++++------ test/test_fortran.py | 6 +----- test/test_loopy.py | 28 ++++++++++++++-------------- test/test_transform.py | 2 +- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/test/test_apps.py b/test/test_apps.py index 3be133d94..4707b7f07 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -258,7 +258,7 @@ def test_rob_stroud_bernstein_full(ctx_factory): <> w = s**(deg-alpha1) {id=init_w} <> tmp[alpha1,i2] = tmp[alpha1,i2] + w * coeffs[aind] \ - {id=write_tmp} + {id=write_tmp,dep=init_w:aind_init} for alpha2 w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ {id=update_w,dep=init_w:write_tmp} @@ -272,15 +272,16 @@ def test_rob_stroud_bernstein_full(ctx_factory): <> xi2 = qpts[0, i1_2] {dep=aind_incr} <> s2 = 1-xi2 <> r2 = xi2/s2 - <> w2 = s2**deg + <> w2 = s2**deg {id=w2_init} for alpha1_2 for i2_2 result[el, i1_2, i2_2] = result[el, i1_2, i2_2] + \ - w2 * tmp[alpha1_2, i2_2] + w2 * tmp[alpha1_2, i2_2] {id=res2,dep=w2_init} end - w2 = w2 * r2 * (deg-alpha1_2) / (1+alpha1_2) + w2 = w2 * r2 * (deg-alpha1_2) / (1+alpha1_2) \ + {id=w2_update, dep=res2} end end end @@ -491,9 +492,9 @@ def test_lbm(ctx_factory): f_new[i, j, 11] = + 0.25*m[8] - 0.125*m[10] - 0.25*m[11] end end - """, - lang_version=(2017, 2, 1)) + """) + knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") knl = lp.add_and_infer_dtypes(knl, {"f": np.float32}) ref_knl = knl diff --git a/test/test_fortran.py b/test/test_fortran.py index 842a0127e..ea2f68b61 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -405,15 +405,11 @@ def test_fuse_kernels(ctx_factory): fortran_template.format( inner=(xd_line + "\n" + yd_line), name="xyderiv")) - knl = lp.fuse_kernels((xderiv, yderiv)) + knl = lp.fuse_kernels((xderiv, yderiv), data_flow=[("result", 0, 1)]) knl = lp.prioritize_loops(knl, "e,i,j,k") assert len(knl.temporary_variables) == 2 - # This is needed for correctness, otherwise ordering could foul things up. - knl = lp.assignment_to_subst(knl, "prev") - knl = lp.assignment_to_subst(knl, "prev_0") - ctx = ctx_factory() lp.auto_test_vs_ref(xyderiv, ctx, knl, parameters=dict(nelements=20, ndofs=4)) diff --git a/test/test_loopy.py b/test/test_loopy.py index 8229f613e..273d0cfd0 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -893,8 +893,8 @@ def test_multiple_writes_to_local_temporary(): knl = lp.make_kernel( "{[i,e]: 0<=i<5 and 0<=e temp[i, 0] = 17 - temp[i, 1] = 15 + <> temp[i, 0] = 17 {nosync_query=writes:temp} + temp[i, 1] = 15 {nosync_query=writes:temp} """) knl = lp.tag_inames(knl, dict(i="l.0")) @@ -1849,7 +1849,7 @@ def test_nop(ctx_factory): <> z[i] = z[i+1] + z[i] {id=wr_z} <> v[i] = 11 {id=wr_v} ... nop {dep=wr_z:wr_v,id=yoink} - z[i] = z[i] - z[i+1] + v[i] + z[i] = z[i] - z[i+1] + v[i] {dep=yoink} end """) @@ -2110,11 +2110,11 @@ def test_if_else(ctx_factory): "{ [i]: 0<=i<50}", """ if i % 3 == 0 - a[i] = 15 + a[i] = 15 {nosync_query=writes:a} elif i % 3 == 1 - a[i] = 11 + a[i] = 11 {nosync_query=writes:a} else - a[i] = 3 + a[i] = 3 {nosync_query=writes:a} end """ ) @@ -2134,14 +2134,14 @@ def test_if_else(ctx_factory): for i if i % 2 == 0 if i % 3 == 0 - a[i] = 15 + a[i] = 15 {nosync_query=writes:a} elif i % 3 == 1 - a[i] = 11 + a[i] = 11 {nosync_query=writes:a} else - a[i] = 3 + a[i] = 3 {nosync_query=writes:a} end else - a[i] = 4 + a[i] = 4 {nosync_query=writes:a} end end """ @@ -2162,17 +2162,17 @@ def test_if_else(ctx_factory): if i < 25 for j if j % 2 == 0 - a[i, j] = 1 + a[i, j] = 1 {nosync_query=writes:a} else - a[i, j] = 0 + a[i, j] = 0 {nosync_query=writes:a} end end else for j if j % 2 == 0 - a[i, j] = 0 + a[i, j] = 0 {nosync_query=writes:a} else - a[i, j] = 1 + a[i, j] = 1 {nosync_query=writes:a} end end end diff --git a/test/test_transform.py b/test/test_transform.py index d9ad97240..08e1d789d 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -78,7 +78,7 @@ def test_collect_common_factors(ctx_factory): """ out_tmp = 0 {id=out_init,inames=i} out_tmp = out_tmp + alpha[i]*a[i,j]*b1[j] {id=out_up1,dep=out_init} - out_tmp = out_tmp + alpha[i]*a[j,i]*b2[j] {id=out_up2,dep=out_init,nosync=} + out_tmp = out_tmp + alpha[i]*a[j,i]*b2[j] {id=out_up2,dep=out_up1} out[i] = out_tmp {dep=out_up1:out_up2} """) knl = lp.add_and_infer_dtypes(knl, -- GitLab From c338e9bc5cd886dee10514a16f376bef225eb1a8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 17:44:18 -0600 Subject: [PATCH 13/40] Improve nosync resolution error message --- loopy/kernel/creation.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 36ab4f1dd..c351aa5a0 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1666,7 +1666,7 @@ def _is_wildcard(s): return any(c in s for c in WILDCARD_SYMBOLS) -def _resolve_dependencies(knl, insn, deps): +def _resolve_dependencies(what, knl, insn, deps): from loopy import find_instructions from loopy.match import MatchExpressionBase @@ -1692,10 +1692,11 @@ def _resolve_dependencies(knl, insn, deps): found_any = True if not found_any and knl.options.check_dep_resolution: - raise LoopyError("instruction '%s' declared a depency on '%s', " + raise LoopyError("instruction '%s' declared %s on '%s', " "which did not resolve to any instruction present in the " "kernel '%s'. Set the kernel option 'check_dep_resolution'" - "to False to disable this check." % (insn.id, dep, knl.name)) + "to False to disable this check." + % (insn.id, what, dep, knl.name)) for dep_id in new_deps: if dep_id not in knl.id_to_insn: @@ -1710,13 +1711,14 @@ def resolve_dependencies(knl): for insn in knl.instructions: new_insns.append(insn.copy( - depends_on=_resolve_dependencies(knl, insn, insn.depends_on), - no_sync_with=frozenset( - (resolved_insn_id, nosync_scope) - for nosync_dep, nosync_scope in insn.no_sync_with - for resolved_insn_id in - _resolve_dependencies(knl, insn, (nosync_dep,))), - )) + depends_on=_resolve_dependencies( + "a dependency", knl, insn, insn.depends_on), + no_sync_with=frozenset( + (resolved_insn_id, nosync_scope) + for nosync_dep, nosync_scope in insn.no_sync_with + for resolved_insn_id in + _resolve_dependencies("nosync", knl, insn, (nosync_dep,))), + )) return knl.copy(instructions=new_insns) -- GitLab From b1ca2b8f8256e97770b884ec6386ffbd71a3ce2b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 17:45:17 -0600 Subject: [PATCH 14/40] Relax ordering in test_collect_common_factors --- test/test_transform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_transform.py b/test/test_transform.py index 08e1d789d..f68db12f8 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -78,7 +78,8 @@ def test_collect_common_factors(ctx_factory): """ out_tmp = 0 {id=out_init,inames=i} out_tmp = out_tmp + alpha[i]*a[i,j]*b1[j] {id=out_up1,dep=out_init} - out_tmp = out_tmp + alpha[i]*a[j,i]*b2[j] {id=out_up2,dep=out_up1} + out_tmp = out_tmp + alpha[i]*a[j,i]*b2[j] \ + {id=out_up2,dep=out_init,nosync=out_up1} out[i] = out_tmp {dep=out_up1:out_up2} """) knl = lp.add_and_infer_dtypes(knl, -- GitLab From ae1e2e306fb9eeeaf8c0a2c1c75401a1f567ab8f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 17:53:10 -0600 Subject: [PATCH 15/40] Bump data model --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index d3b020361..d7b0ebc45 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,7 +32,7 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v76-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v77-islpy%s" % _islpy_version FALLBACK_LANGUAGE_VERSION = (2017, 2, 1) -- GitLab From 45f2df4e040e4d3ec5e57cc543b0dd3b6a5f0904 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 18:09:15 -0600 Subject: [PATCH 16/40] add_nosync: Add empty_ok flag --- loopy/transform/instruction.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 676a3db66..e6ecb4093 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -228,7 +228,8 @@ def tag_instructions(kernel, new_tag, within=None): # {{{ add nosync -def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): +def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False, + empty_ok=False): """Add a *no_sync_with* directive between *source* and *sink*. *no_sync_with* is only added if *sink* depends on *source* or if the instruction pair is in a conflicting group. @@ -248,8 +249,16 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive even without the presence of a dependency edge or conflicting instruction group. + :arg empty_ok: If *True*, do not complain even if no *nosync* tags were + added as a result of the transformation. :return: The updated kernel + + .. versionchanged:: 2018.1 + + If the transformation adds no *nosync* directives, it will complain. + This used to silently pass. This behavior can be restored using + *empty_ok*. """ if isinstance(source, str) and source in kernel.id_to_insn: @@ -264,9 +273,9 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): sinks = frozenset( sink.id for sink in find_instructions(kernel, sink)) - if not sources: + if not sources and not empty_ok: raise LoopyError("No match found for source specification '%s'." % source) - if not sinks: + if not sinks and not empty_ok: raise LoopyError("No match found for sink specification '%s'." % sink) def insns_in_conflicting_groups(insn1_id, insn2_id): @@ -295,7 +304,7 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): if bidirectional: nosync_to_add[source].add((sink, scope)) - if not nosync_to_add: + if not nosync_to_add and not empty_ok: raise LoopyError("No nosync annotations were added as a result " "of this call. add_nosync will (by default) only add them to " "accompany existing depencies or group exclusions. Maybe you want " -- GitLab From 0055116ca9d1c39d559fc15b0f8e61ca8cc3fe7e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 18:09:42 -0600 Subject: [PATCH 17/40] test_struct_assignment: Fix ordering assumptions --- test/test_loopy.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 273d0cfd0..3e1d2a631 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1873,7 +1873,7 @@ def test_global_barrier(ctx_factory): <> z[i] = z[i+1] + z[i] {id=wr_z,dep=top} <> v[i] = 11 {id=wr_v,dep=top} ... gbarrier {dep=wr_z:wr_v,id=yoink} - z[i] = z[i] - z[i+1] + v[i] {id=iupd} + z[i] = z[i] - z[i+1] + v[i] {id=iupd, dep=wr_z} end ... gbarrier {dep=iupd,id=postloop} z[i] = z[i] - z[i+1] + v[i] {dep=postloop} @@ -2573,10 +2573,10 @@ def test_struct_assignment(ctx_factory): "{ [i]: 0<=i Date: Fri, 9 Feb 2018 18:10:25 -0600 Subject: [PATCH 18/40] test_add_nosync: pass empty_ok in no-op test --- test/test_transform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_transform.py b/test/test_transform.py index f68db12f8..a234d7ac5 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -496,7 +496,8 @@ def test_add_nosync(): orig_knl = lp.set_temporary_scope(orig_knl, "tmp5", "local") # No dependency present - don't add nosync - knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2") + knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2", + empty_ok=True) assert frozenset() == knl.id_to_insn["insn2"].no_sync_with # Dependency present -- GitLab From c6fc57763056cdd88dddf035a3912d70ff0440bf Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 20:33:12 -0600 Subject: [PATCH 19/40] Switch to import-based global language versioning scheme --- doc/tutorial.rst | 1 + examples/python/hello-loopy.py | 4 +--- loopy/kernel/creation.py | 38 +++++++++++++++++++++++++--------- loopy/version.py | 28 +++++++++++++++++++------ test/test_apps.py | 2 +- test/test_c_execution.py | 2 +- test/test_dg.py | 2 +- test/test_diff.py | 2 +- test/test_domain.py | 2 +- test/test_linalg.py | 2 +- test/test_loopy.py | 2 +- test/test_misc.py | 2 +- test/test_nbody.py | 2 +- test/test_numa_diff.py | 2 +- test/test_reduction.py | 2 +- test/test_scan.py | 2 +- test/test_sem_reagan.py | 2 +- test/test_target.py | 2 +- test/test_transform.py | 2 +- 19 files changed, 67 insertions(+), 34 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 7196dad86..7ac506806 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -25,6 +25,7 @@ import a few modules and set up a :class:`pyopencl.Context` and a >>> import loopy as lp >>> lp.set_caching_enabled(False) + >>> from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 >>> from warnings import filterwarnings, catch_warnings >>> filterwarnings('error', category=lp.LoopyWarning) diff --git a/examples/python/hello-loopy.py b/examples/python/hello-loopy.py index da1273d2b..6fa9b5fd3 100644 --- a/examples/python/hello-loopy.py +++ b/examples/python/hello-loopy.py @@ -2,15 +2,13 @@ import numpy as np import loopy as lp import pyopencl as cl import pyopencl.array +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # setup # ----- ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -# for make_kernel calls from this file -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) - n = 15 * 10**6 a = cl.array.arange(queue, n, dtype=np.float32) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index c351aa5a0..0daf327f4 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1922,10 +1922,12 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): If not given, this value defaults to version **(2017, 2, 1)** and a warning will be issued. - If this is impractical, you may also place a global variable - ``LOOPY_KERNEL_LANGUAGE_VERSION`` in the global namespace of the - function calling :func:`make_kernel`. If *lang_version* is not - explicitly given, that its value will be used. + To set the kernel version for all :mod:`loopy` kernels in a (Python) source + file, you may simply say:: + + from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 + + If *lang_version* is not explicitly given, that version value will be used. See also :ref:`language-versioning`. @@ -1981,17 +1983,26 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): if lang_version is None: # {{{ peek into caller's module to look for LOOPY_KERNEL_LANGUAGE_VERSION + from loopy.version import LANGUAGE_VERSION_SYMBOLS + # This *is* gross. But it seems like the right thing interface-wise. import inspect caller_globals = inspect.currentframe().f_back.f_globals - try: - lang_version = caller_globals["LOOPY_KERNEL_LANGUAGE_VERSION"] - except KeyError: - pass + for ver_sym in LANGUAGE_VERSION_SYMBOLS: + try: + lang_version = caller_globals[ver_sym] + break + except KeyError: + pass # }}} + import loopy.version + version_to_symbol = dict( + (getattr(loopy.version, lvs), lvs) + for lvs in LANGUAGE_VERSION_SYMBOLS) + if lang_version is None: from warnings import warn from loopy.diagnostic import LoopyWarning @@ -2001,13 +2012,20 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): warn("'lang_version' was not passed to make_kernel(). " "To avoid this warning, pass " "lang_version={ver} in this invocation. " - "(Or set LOOPY_KERNEL_LANGUAGE_VERSION = {ver} in " + "(Or say 'from loopy.version import " + "{sym_ver}' in " "the global scope of the calling frame.)" - .format(ver=MOST_RECENT_LANGUAGE_VERSION), + .format( + ver=MOST_RECENT_LANGUAGE_VERSION, + sym_ver=version_to_symbol[MOST_RECENT_LANGUAGE_VERSION] + ), LoopyWarning, stacklevel=2) lang_version = FALLBACK_LANGUAGE_VERSION + if lang_version not in version_to_symbol: + raise LoopyError("Language version '%s' is not known." % lang_version) + if lang_version >= (2018, 1): options = options.copy(enforce_variable_access_ordered=True) diff --git a/loopy/version.py b/loopy/version.py index d7b0ebc45..aeb0b277a 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -38,6 +38,14 @@ DATA_MODEL_VERSION = "v77-islpy%s" % _islpy_version FALLBACK_LANGUAGE_VERSION = (2017, 2, 1) MOST_RECENT_LANGUAGE_VERSION = (2018, 1) +LOOPY_USE_LANGUAGE_VERSION_2018_1 = (2018, 1) +LOOPY_USE_LANGUAGE_VERSION_2017_2_1 = (2017, 2, 1) + +LANGUAGE_VERSION_SYMBOLS = [ + "LOOPY_USE_LANGUAGE_VERSION_2018_1", + "LOOPY_USE_LANGUAGE_VERSION_2017_2_1", + ] + __doc__ = """ .. currentmodule:: loopy @@ -64,10 +72,13 @@ language version to let them take advantage of this check. As a result, :mod:`loopy` will now issue a warning when a call to :func:`loopy.make_kernel` does not declare a language version. Such kernels will (indefinitely) default to language version 2017.2.1. If passing a -language version to :func:`make_kernel` is impractical, you may also place a -global variable ``LOOPY_KERNEL_LANGUAGE_VERSION`` in the global namespace of -the function calling :func:`make_kernel`. If *lang_version* is not explicitly -given, this value will be used. +language version to :func:`make_kernel` is impractical, you may also import +one of the ``LOOPY_USE_LANGUAGE_VERSION_...`` symbols given below using:: + + from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 + +in the global namespace of the function calling :func:`make_kernel`. If +*lang_version* in that call is not explicitly given, this value will be used. Language versions will generally reflect the version number of :mod:`loopy` in which they were introduced, though it is likely that most versions of @@ -81,11 +92,16 @@ will work hard to avoid backward-incompatible language changes.) example **(2018, 1)**. Direct comparison of these tuples will always yield valid version comparisons. + History of Language Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* ``(2018, 1)``: :attr:`loopy.Options.enforce_variable_access_ordered` +.. data:: LOOPY_USE_LANGUAGE_VERSION_2018_1 + + :attr:`loopy.Options.enforce_variable_access_ordered` is turned on by default. -* ``(2017, 2, 1)``: Initial legacy language version. +.. data:: LOOPY_USE_LANGUAGE_VERSION_2017_2_1 + + Initial legacy language version. """ diff --git a/test/test_apps.py b/test/test_apps.py index 4707b7f07..1be7edec1 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -49,7 +49,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa # {{{ convolutions diff --git a/test/test_c_execution.py b/test/test_c_execution.py index 582f3a105..f653eb0dc 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -40,7 +40,7 @@ else: faulthandler.enable() -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_c_target(): diff --git a/test/test_dg.py b/test/test_dg.py index 6688362aa..ae725ab49 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -34,7 +34,7 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_dg_volume(ctx_factory): diff --git a/test/test_diff.py b/test/test_diff.py index 8a4fe9587..3d19721ac 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -48,7 +48,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_diff(ctx_factory): diff --git a/test/test_domain.py b/test/test_domain.py index d8a83007a..680ff2992 100644 --- a/test/test_domain.py +++ b/test/test_domain.py @@ -52,7 +52,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_assume(ctx_factory): diff --git a/test/test_linalg.py b/test/test_linalg.py index 7eba5facb..accdebc12 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -62,7 +62,7 @@ def check_float4(result, ref_result): ref_result[comp], result[comp], rtol=1e-3, atol=1e-3), None -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_axpy(ctx_factory): diff --git a/test/test_loopy.py b/test/test_loopy.py index 3e1d2a631..b876cdb55 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -52,7 +52,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_globals_decl_once_with_multi_subprogram(ctx_factory): diff --git a/test/test_misc.py b/test/test_misc.py index c1ae6c532..ec14770a9 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -32,7 +32,7 @@ import logging logger = logging.getLogger(__name__) -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_compute_sccs(): diff --git a/test/test_nbody.py b/test/test_nbody.py index f231dfd5b..f2a8fc198 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -34,7 +34,7 @@ import logging logger = logging.getLogger(__name__) -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_nbody(ctx_factory): diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 7bacad75f..a287ad59d 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -44,7 +44,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa @pytest.mark.parametrize("Nq", [7]) diff --git a/test/test_reduction.py b/test/test_reduction.py index 1ddbbfebf..6b62bad5b 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -49,7 +49,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_nonsense_reduction(ctx_factory): diff --git a/test/test_scan.py b/test/test_scan.py index 228453fd8..44903611d 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -56,7 +56,7 @@ __all__ = [ # - scan(a) + scan(b) # - test for badly tagged inames -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa @pytest.mark.parametrize("n", [1, 2, 3, 16]) diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index a92f2b2ab..ecb2352ae 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -31,7 +31,7 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_tim2d(ctx_factory): diff --git a/test/test_target.py b/test/test_target.py index 71a2548c1..c143fbbd2 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -52,7 +52,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_ispc_target(occa_mode=False): diff --git a/test/test_transform.py b/test/test_transform.py index a234d7ac5..e1a58e302 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -49,7 +49,7 @@ __all__ = [ ] -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_chunk_iname(ctx_factory): -- GitLab From 04eeb95bb5166fa8b29f0ffd04ac75d5d53af1b7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 20:33:28 -0600 Subject: [PATCH 20/40] Switch doctests to run on Py3 --- .gitlab-ci.yml | 6 ++-- doc/tutorial.rst | 85 ++++++++++++++++++++++++++---------------------- 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c76590f62..c2b796dfc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,6 +4,7 @@ Python 2.7 AMD CPU: - export PYOPENCL_TEST=amd:pu - export EXTRA_INSTALL="numpy mako" - export LOOPY_NO_CACHE=1 + - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -18,6 +19,7 @@ Python 2.6 POCL: - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="numpy mako" - export LOOPY_NO_CACHE=1 + - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -31,7 +33,6 @@ Python 3.5 AMD CPU: - export PY_EXE=python3.5 - export PYOPENCL_TEST=amd:pu - export EXTRA_INSTALL="numpy mako" - - export NO_DOCTESTS=1 - export LOOPY_NO_CACHE=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -47,6 +48,7 @@ Python 2.7 POCL: - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="numpy mako" - export LOOPY_NO_CACHE=1 + - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -77,7 +79,6 @@ Python 3.6 POCL: - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="numpy mako" - export LOOPY_NO_CACHE=1 - - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -91,7 +92,6 @@ Python 3.6 POCL Twice With Cache: - export PY_EXE=python3.6 - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="numpy mako" - - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" - "cd .." diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 7ac506806..217e1ef7c 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1158,7 +1158,7 @@ this, :mod:`loopy` will complain that global barrier needs to be inserted: >>> cgr = lp.generate_code_v2(knl) Traceback (most recent call last): ... - MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' instruction option to state that no synchronization is needed) + loopy.diagnostic.MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' instruction option to state that no synchronization is needed) The syntax for a inserting a global barrier instruction is ``... gbarrier``. :mod:`loopy` also supports manually inserting local @@ -1201,7 +1201,7 @@ Here is what happens when we try to generate code for the kernel: >>> cgr = lp.generate_code_v2(knl) Traceback (most recent call last): ... - MissingDefinitionError: temporary variable 'tmp' gets used in subkernel 'rotate_v2_0' without a definition (maybe you forgot to call loopy.save_and_reload_temporaries?) + loopy.diagnostic.MissingDefinitionError: temporary variable 'tmp' gets used in subkernel 'rotate_v2_0' without a definition (maybe you forgot to call loopy.save_and_reload_temporaries?) This happens due to the kernel splitting done by :mod:`loopy`. The splitting happens when the instruction schedule is generated. To see the schedule, we @@ -1397,7 +1397,7 @@ Attempting to create this kernel results in an error: ... # While trying to find shape axis 0 of argument 'out', the following exception occurred: Traceback (most recent call last): ... - StaticValueFindingError: a static maximum was not found for PwAff '[n] -> { [(1)] : n <= 1; [(n)] : n >= 2 }' + loopy.diagnostic.StaticValueFindingError: a static maximum was not found for PwAff '[n] -> { [(1)] : n <= 1; [(n)] : n >= 2 }' The problem is that loopy cannot find a simple, universally valid expression for the length of *out* in this case. Notice how the kernel accesses both the @@ -1463,7 +1463,7 @@ sign that something is amiss: >>> evt, (out,) = knl(queue, a=a_mat_dev) Traceback (most recent call last): ... - WriteRaceConditionWarning: in kernel transpose: instruction 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable) + loopy.diagnostic.WriteRaceConditionWarning: in kernel transpose: instruction 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable) When we ask to see the code, the issue becomes apparent: @@ -1546,20 +1546,18 @@ containing different types of data: >>> knl = lp.add_and_infer_dtypes(knl, ... dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64)) -Note that loopy will infer the data types for arrays c and e from the +Note that loopy will infer the data types for arrays ``c`` and ``e`` from the information provided. Now we will count the operations: .. doctest:: >>> op_map = lp.get_op_map(knl) >>> print(lp.stringify_stats_mapping(op_map)) - Op(np:dtype('float32'), add) : [m, l, n] -> { m * l * n : m > 0 and l > 0 and n > 0 } - Op(np:dtype('float32'), div) : [m, l, n] -> { m * l * n : m > 0 and l > 0 and n > 0 } - Op(np:dtype('float32'), mul) : [m, l, n] -> { m * l * n : m > 0 and l > 0 and n > 0 } - Op(np:dtype('float64'), add) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 } - Op(np:dtype('float64'), mul) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 } - Op(np:dtype('int32'), add) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 } - + Op(np:dtype('float32'), add) : ... + +Each line of output will look roughly like:: + + Op(np:dtype('float32'), add) : [l, m, n] -> { l * m * n : l > 0 and m > 0 and n > 0 } :func:`loopy.get_op_map` returns a :class:`loopy.ToCountMap` of **{** :class:`loopy.Op` **:** :class:`islpy.PwQPolynomial` **}**. A @@ -1616,15 +1614,18 @@ together into keys containing only the specified fields: >>> op_map_dtype = op_map.group_by('dtype') >>> print(lp.stringify_stats_mapping(op_map_dtype)) - Op(np:dtype('float32'), None) : [m, l, n] -> { 3 * m * l * n : m > 0 and l > 0 and n > 0 } - Op(np:dtype('float64'), None) : [m, l, n] -> { 2 * m * n : m > 0 and l > 0 and n > 0 } - Op(np:dtype('int32'), None) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 } + Op(np:dtype('float32'), None) : ... >>> f32op_count = op_map_dtype[lp.Op(dtype=np.float32) ... ].eval_with_dict(param_dict) >>> print(f32op_count) 1572864 +The lines of output above might look like:: + + Op(np:dtype('float32'), None) : [m, l, n] -> { 3 * m * l * n : m > 0 and l > 0 and n > 0 } + Op(np:dtype('float64'), None) : [m, l, n] -> { 2 * m * n : m > 0 and l > 0 and n > 0 } + See the reference page for :class:`loopy.ToCountMap` and :class:`loopy.Op` for more information on these functions. @@ -1639,13 +1640,15 @@ we'll continue using the kernel from the previous example: >>> mem_map = lp.get_mem_access_map(knl) >>> print(lp.stringify_stats_mapping(mem_map)) + MemAccess(global, np:dtype('float32'), 0, load, a) : ... + + +Each line of output will look roughly like:: + + MemAccess(global, np:dtype('float32'), 0, load, a) : [m, l, n] -> { 2 * m * l * n : m > 0 and l > 0 and n > 0 } MemAccess(global, np:dtype('float32'), 0, load, b) : [m, l, n] -> { m * l * n : m > 0 and l > 0 and n > 0 } MemAccess(global, np:dtype('float32'), 0, store, c) : [m, l, n] -> { m * l * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), 0, load, g) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), 0, load, h) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), 0, store, e) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 } - :func:`loopy.get_mem_access_map` returns a :class:`loopy.ToCountMap` of **{** :class:`loopy.MemAccess` **:** :class:`islpy.PwQPolynomial` **}**. @@ -1694,18 +1697,13 @@ using :func:`loopy.ToCountMap.to_bytes` and :func:`loopy.ToCountMap.group_by`: >>> bytes_map = mem_map.to_bytes() >>> print(lp.stringify_stats_mapping(bytes_map)) - MemAccess(global, np:dtype('float32'), 0, load, a) : [m, l, n] -> { 8 * m * l * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float32'), 0, load, b) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float32'), 0, store, c) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), 0, load, g) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), 0, load, h) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), 0, store, e) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float32'), 0, load, a) : ... >>> global_ld_st_bytes = bytes_map.filter_by(mtype=['global'] ... ).group_by('direction') >>> print(lp.stringify_stats_mapping(global_ld_st_bytes)) - MemAccess(None, None, None, load, None) : [m, l, n] -> { (16 * m + 12 * m * l) * n : m > 0 and l > 0 and n > 0 } - MemAccess(None, None, None, store, None) : [m, l, n] -> { (8 * m + 4 * m * l) * n : m > 0 and l > 0 and n > 0 } + MemAccess(None, None, None, load, None) : ... + MemAccess(None, None, None, store, None) : ... >>> loaded = global_ld_st_bytes[lp.MemAccess(direction='load') ... ].eval_with_dict(param_dict) @@ -1715,6 +1713,15 @@ using :func:`loopy.ToCountMap.to_bytes` and :func:`loopy.ToCountMap.group_by`: bytes loaded: 7340032 bytes stored: 2621440 +The lines of output above might look like:: + + MemAccess(global, np:[m, l, n] -> { 8 * m * l * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float32'), 0, load, b) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float32'), 0, store, c) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float64'), 0, load, g) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float64'), 0, load, h) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float64'), 0, store, e) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } + One can see how these functions might be useful in computing, for example, achieved memory bandwidth in byte/sec or performance in FLOP/sec. @@ -1732,12 +1739,12 @@ resulting :class:`islpy.PwQPolynomial` will be more complicated this time. ... outer_tag="l.1", inner_tag="l.0") >>> mem_map = lp.get_mem_access_map(knl_consec) >>> print(lp.stringify_stats_mapping(mem_map)) - MemAccess(global, np:dtype('float32'), 1, load, a) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float32'), 1, load, b) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float32'), 1, store, c) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float64'), 1, load, g) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float64'), 1, load, h) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float64'), 1, store, e) : [m, l, n] -> { ... } + MemAccess(global, np:dtype('float32'), 1, load, a) : ... + MemAccess(global, np:dtype('float32'), 1, load, b) : ... + MemAccess(global, np:dtype('float32'), 1, store, c) : ... + MemAccess(global, np:dtype('float64'), 1, load, g) : ... + MemAccess(global, np:dtype('float64'), 1, load, h) : ... + MemAccess(global, np:dtype('float64'), 1, store, e) : ... With this parallelization, consecutive threads will access consecutive array @@ -1773,12 +1780,12 @@ switch the inner and outer tags in our parallelization of the kernel: ... outer_tag="l.0", inner_tag="l.1") >>> mem_map = lp.get_mem_access_map(knl_nonconsec) >>> print(lp.stringify_stats_mapping(mem_map)) - MemAccess(global, np:dtype('float32'), 128, load, a) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float32'), 128, load, b) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float32'), 128, store, c) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float64'), 128, load, g) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float64'), 128, load, h) : [m, l, n] -> { ... } - MemAccess(global, np:dtype('float64'), 128, store, e) : [m, l, n] -> { ... } + MemAccess(global, np:dtype('float32'), 128, load, a) : ... + MemAccess(global, np:dtype('float32'), 128, load, b) : ... + MemAccess(global, np:dtype('float32'), 128, store, c) : ... + MemAccess(global, np:dtype('float64'), 128, load, g) : ... + MemAccess(global, np:dtype('float64'), 128, load, h) : ... + MemAccess(global, np:dtype('float64'), 128, store, e) : ... With this parallelization, consecutive threads will access *nonconsecutive* -- GitLab From 5940f863fa091ab8744c9fcbefb4c96ccfc1edda Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 20:41:36 -0600 Subject: [PATCH 21/40] Add test for variable ordering check --- loopy/check.py | 19 ++++++++++++++++++- loopy/diagnostic.py | 4 ++++ test/test_loopy.py | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index d8994b22d..2fa9874b1 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -250,6 +250,8 @@ def check_for_data_dependent_parallel_bounds(kernel): % (i, par, ", ".join(par_inames))) +# {{{ check access bounds + class _AccessCheckMapper(WalkMapper): def __init__(self, kernel, domain, insn_id): self.kernel = kernel @@ -340,6 +342,10 @@ def check_bounds(kernel): insn.with_transformed_expressions(run_acm) +# }}} + + +# {{{ check write destinations def check_write_destinations(kernel): for insn in kernel.instructions: @@ -363,6 +369,10 @@ def check_write_destinations(kernel): or wvar in kernel.arg_dict) and wvar not in kernel.all_params(): raise LoopyError +# }}} + + +# {{{ check_has_schedulable_iname_nesting def check_has_schedulable_iname_nesting(kernel): from loopy.transform.iname import (has_schedulable_iname_nesting, @@ -379,6 +389,10 @@ def check_has_schedulable_iname_nesting(kernel): "to get hints about which iname to duplicate. Here are some " "options:\n%s" % opt_str) +# }}} + + +# {{{ check_variable_access_ordered class IndirectDependencyEdgeFinder(object): def __init__(self, kernel): @@ -499,7 +513,8 @@ def check_variable_access_ordered(kernel): other_id=other_id, var=name)) if kernel.options.enforce_variable_access_ordered: - raise LoopyError(msg) + from loopy.diagnostic import VariableAccessNotOrdered + raise VariableAccessNotOrdered(msg) else: from loopy.diagnostic import warn_with_kernel warn_with_kernel( @@ -507,6 +522,8 @@ def check_variable_access_ordered(kernel): # }}} +# }}} + def pre_schedule_checks(kernel): try: diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py index 4868f70af..c2b78f4d7 100644 --- a/loopy/diagnostic.py +++ b/loopy/diagnostic.py @@ -115,6 +115,10 @@ class LoopyTypeError(LoopyError): class ExpressionNotAffineError(LoopyError): pass + +class VariableAccessNotOrdered(LoopyError): + pass + # }}} diff --git a/test/test_loopy.py b/test/test_loopy.py index b876cdb55..931436959 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2791,6 +2791,21 @@ def test_add_prefetch_works_in_lhs_index(): assert "a1_map" not in get_dependencies(insn.assignees) +def test_check_for_variable_access_ordering(): + knl = lp.make_kernel( + "{[i]: 0<=i 1: exec(sys.argv[1]) -- GitLab From e1f0101a069a11258b0050bec5dbe55898b9957e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 9 Feb 2018 23:00:48 -0600 Subject: [PATCH 22/40] Skip doctests on Py2 legacy PyOpenCL --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c2b796dfc..4d4f7e528 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -64,6 +64,7 @@ Python 2.7 with legacy PyOpenCL: - export EXTRA_INSTALL="numpy mako" - export REQUIREMENTS_TXT="requirements-old-pyopencl.txt" - export LOOPY_NO_CACHE=1 + - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: -- GitLab From 382f2d8920772d97e82e8844c200daf573626c6c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 00:30:25 -0600 Subject: [PATCH 23/40] Make check_variable_access_ordered work for aliased accesses --- loopy/check.py | 34 +++++++++++------- loopy/kernel/tools.py | 80 +++++++++++++++++++++++++++++++++++++++++++ test/test_loopy.py | 19 ++++++++++ 3 files changed, 121 insertions(+), 12 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 2fa9874b1..2b6b88dbc 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -450,26 +450,30 @@ def check_variable_access_ordered(kernel): * an (at least indirect) depdendency edge, or * an explicit statement that no ordering is necessary (expressed - through :attr:`loopy.Instruuction.no_sync_with`) + through :attr:`loopy.Instruction.no_sync_with`) """ if kernel.options.enforce_variable_access_ordered == "no_check": return - checked_variables = ( - kernel.get_written_variables() - | set(kernel.temporary_variables) - | set(arg for arg in kernel.arg_dict)) + checked_variables = kernel.get_written_variables() & ( + set(kernel.temporary_variables) | set(arg for arg in kernel.arg_dict)) wmap = kernel.writer_map() rmap = kernel.reader_map() from loopy.kernel.data import GlobalArg, ValueArg, temp_var_scope + from loopy.kernel.tools import find_aliasing_equivalence_classes depfind = IndirectDependencyEdgeFinder(kernel) + aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) for name in checked_variables: - readers = rmap.get(name, set()) - writers = wmap.get(name, set()) + eq_class = aliasing_equiv_classes[name] + + readers = set.union( + *[rmap.get(eq_name, set()) for eq_name in eq_class]) + writers = set.union( + *[wmap.get(eq_name, set()) for eq_name in eq_class]) if not writers: continue @@ -503,15 +507,21 @@ def check_variable_access_ordered(kernel): if not has_dependency_relationship: msg = ("No dependency relationship found between " - "'{writer_id}' which writes '{var}' and " - "'{other_id}' which also accesses '{var}'. " - "Please either add a (possibly indirect) dependency " - "between the two, or add one to the other's no_sync set " + "'{writer_id}' which writes {var} and " + "'{other_id}' which also accesses {var}. " + "Either add a (possibly indirect) dependency " + "between the two, or add one to the other's nosync set " "to indicate that no ordering is intended. " .format( writer_id=writer_id, other_id=other_id, - var=name)) + var=( + "the variable '%s'" % name + if len(eq_class) == 1 + else ( + "the aliasing equivalence class '%s'" + % ", ".join(eq_class)) + ))) if kernel.options.enforce_variable_access_ordered: from loopy.diagnostic import VariableAccessNotOrdered raise VariableAccessNotOrdered(msg) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index fbc4238c2..4f0b805e1 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1731,4 +1731,84 @@ def get_subkernel_to_insn_id_map(kernel): # }}} +# {{{ find aliasing equivalence classes + +class DisjointSets(object): + """ + .. automethod:: __getitem__ + .. automethod:: find_leader_or_create_group + .. automethod:: union + .. automethod:: union_many + """ + + # https://en.wikipedia.org/wiki/Disjoint-set_data_structure + + def __init__(self): + self.leader_to_group = {} + self.element_to_leader = {} + + def __getitem__(self, item): + """ + :arg item: A representative of an equivalence class. + :returns: the equivalence class, given as a set of elements + """ + try: + leader = self.element_to_leader[item] + except KeyError: + return set([item]) + else: + return self.leader_to_group[leader] + + def find_leader_or_create_group(self, el): + try: + return self.element_to_leader[el] + except KeyError: + pass + + self.element_to_leader[el] = el + self.leader_to_group[el] = set([el]) + return el + + def union(self, a, b): + leader_a = self.find_leader_or_create_group(a) + leader_b = self.find_leader_or_create_group(b) + + if leader_a == leader_b: + return + + new_leader = leader_a + + for b_el in self.leader_to_group[leader_b]: + self.element_to_leader[b_el] = new_leader + + self.leader_to_group[leader_a].update(self.leader_to_group[leader_b]) + del self.leader_to_group[leader_b] + + def union_many(self, relation): + """ + :arg relation: an iterable of 2-tuples enumerating the elements of the + relation. The relation is assumed to be an equivalence relation + (transitive, reflexive, symmetric) but need not explicitly contain + all elements to make it that. + + The first elements of the tuples become group leaders. + + :returns: *self* + """ + + for a, b in relation: + self.union(a, b) + + return self + + +def find_aliasing_equivalence_classes(kernel): + return DisjointSets().union_many( + (tv.base_storage, tv.name) + for tv in six.itervalues(kernel.temporary_variables) + if tv.base_storage is not None) + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 931436959..e1de0af80 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2806,6 +2806,25 @@ def test_check_for_variable_access_ordering(): lp.get_one_scheduled_kernel(knl) +def test_check_for_variable_access_ordering_with_aliasing(): + knl = lp.make_kernel( + "{[i]: 0<=i 1: exec(sys.argv[1]) -- GitLab From ff36086d2d70d30142d14fdb650b130b94b65a04 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 01:37:04 -0600 Subject: [PATCH 24/40] Convert left-over language version decl in test_statistics --- test/test_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_statistics.py b/test/test_statistics.py index 9427a4edf..e4232e613 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -34,7 +34,7 @@ import numpy as np from pymbolic.primitives import Variable -LOOPY_KERNEL_LANGUAGE_VERSION = (2018, 1) +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa def test_op_counter_basic(): -- GitLab From ce321e9b446637a8e60a7415ec572d0d88f9a8cc Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 01:40:26 -0600 Subject: [PATCH 25/40] Restructure check_variable_access_ordered to operate in terms of instructions, not IDs --- loopy/check.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 2b6b88dbc..ebe919838 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -421,10 +421,7 @@ class IndirectDependencyEdgeFinder(object): return False -def needs_no_sync_with(kernel, var_scope, dep_a_id, dep_b_id): - dep_a = kernel.id_to_insn[dep_a_id] - dep_b = kernel.id_to_insn[dep_b_id] - +def needs_no_sync_with(kernel, var_scope, dep_a, dep_b): from loopy.kernel.data import temp_var_scope if var_scope == temp_var_scope.GLOBAL: search_scopes = ["global", "any"] @@ -498,12 +495,16 @@ def check_variable_access_ordered(kernel): if writer_id == other_id: continue + writer = kernel.id_to_insn[writer_id] + other = kernel.id_to_insn[other_id] + has_dependency_relationship = ( - needs_no_sync_with(kernel, scope, other_id, writer_id) + needs_no_sync_with(kernel, scope, other, writer) or depfind(writer_id, other_id) or - depfind(other_id, writer_id)) + depfind(other_id, writer_id) + ) if not has_dependency_relationship: msg = ("No dependency relationship found between " -- GitLab From a8b92da0c75849dbb1b33da70d6de306d5bde482 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 01:41:15 -0600 Subject: [PATCH 26/40] check_variable_access_ordered: Require nosync in both directions --- loopy/check.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index ebe919838..bd79cbea9 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -432,13 +432,16 @@ def needs_no_sync_with(kernel, var_scope, dep_a, dep_b): else: raise ValueError("unexpected value of 'temp_var_scope'") + ab_nosync = False + ba_nosync = False + for scope in search_scopes: - if (dep_a_id, scope) in dep_b.no_sync_with: - return True - if (dep_b_id, scope) in dep_a.no_sync_with: - return True + if (dep_a.id, scope) in dep_b.no_sync_with: + ab_nosync = True + if (dep_b.id, scope) in dep_a.no_sync_with: + ba_nosync = True - return False + return ab_nosync and ba_nosync def check_variable_access_ordered(kernel): -- GitLab From 82dd34a29a62f0f3ea4702f1e6713374cd6ccbe3 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 02:45:32 -0600 Subject: [PATCH 27/40] Dependency arrows: use only one column per dependee --- loopy/kernel/tools.py | 98 +++++++++++++++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 4f0b805e1..15db06ad7 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1278,14 +1278,14 @@ def draw_dependencies_as_unicode_arrows( for insn in instructions: for dep in insn.depends_on: - reverse_deps.setdefault(dep, []).append(insn.id) + reverse_deps.setdefault(dep, set()).add(insn.id) # mapping of (from_id, to_id) tuples to column_index dep_to_column = {} # {{{ find column assignments - # mapping from column indices to (end_insn_id, updown) + # mapping from column indices to (end_insn_ids, pointed_at_insn_id) columns_in_use = {} n_columns = [0] @@ -1299,47 +1299,101 @@ def draw_dependencies_as_unicode_arrows( row.append(" ") return i - def do_flag_downward(s, updown): - if flag_downward and updown == "down": + def do_flag_downward(s, pointed_at_insn_id): + if flag_downward and pointed_at_insn_id not in processed_ids: return fore.RED+s+style.RESET_ALL else: return s def make_extender(): result = n_columns[0] * [" "] - for col, (_, updown) in six.iteritems(columns_in_use): - result[col] = do_flag_downward(u"│", updown) + for col, (_, pointed_at_insn_id) in six.iteritems(columns_in_use): + result[col] = do_flag_downward(u"│", pointed_at_insn_id) return result + processed_ids = set() + rows = [] for insn in instructions: row = make_extender() - for rdep in reverse_deps.get(insn.id, []): - assert rdep != insn.id + # {{{ add rdeps for already existing columns - dep_key = (rdep, insn.id) - if dep_key not in dep_to_column: - col = dep_to_column[dep_key] = find_free_column() - columns_in_use[col] = (rdep, "up") - row[col] = u"↱" + rdeps = reverse_deps.get(insn.id, set()).copy() - processed_ids + assert insn.id not in rdeps + + if insn.id in dep_to_column: + columns_in_use[insn.id][0].update(rdeps) + + # }}} + + # {{{ add deps for already existing columns + + for dep in insn.depends_on: + dep_key = dep + if dep_key in dep_to_column: + col = dep_to_column[dep] + columns_in_use[col][0].add(insn.id) + + # }}} + + for col, (starts, pointed_at_insn_id) in list(six.iteritems(columns_in_use)): + if insn.id == pointed_at_insn_id: + if starts: + # will continue downward + row[col] = do_flag_downward(u">", pointed_at_insn_id) + else: + # stops here + + # placeholder, pending deletion + columns_in_use[col] = None + + row[col] = do_flag_downward(u"↳", pointed_at_insn_id) + + elif insn.id in starts: + starts.remove(insn.id) + if starts: + # will continue downward + row[col] = do_flag_downward(u"├", pointed_at_insn_id) + + else: + # stops here + row[col] = u"└" + # placeholder, pending deletion + columns_in_use[col] = None + + # {{{ start arrows by reverse dep + + dep_key = insn.id + if dep_key not in dep_to_column and rdeps: + col = dep_to_column[dep_key] = find_free_column() + columns_in_use[col] = (rdeps, insn.id) + row[col] = u"↱" + + # }}} + + # {{{ start arrows by forward dep for dep in insn.depends_on: assert dep != insn.id - dep_key = (insn.id, dep) + dep_key = dep if dep_key not in dep_to_column: col = dep_to_column[dep_key] = find_free_column() - columns_in_use[col] = (dep, "down") - row[col] = do_flag_downward(u"┌", "down") + columns_in_use[col] = (set([insn.id]), dep) + row[col] = do_flag_downward(u"┌", dep) - for col, (end, updown) in list(six.iteritems(columns_in_use)): - if insn.id == end: + # }}} + + # {{{ delete columns_in_use entry for end-of-life columns + + for col, value in list(six.iteritems(columns_in_use)): + if value is None: del columns_in_use[col] - if updown == "up": - row[col] = u"└" - else: - row[col] = do_flag_downward(u"↳", updown) + + # }} + + processed_ids.add(insn.id) extender = make_extender() -- GitLab From b2c491b6137fb06175ab3d733cf3ba33884c9ffd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 20:19:24 -0600 Subject: [PATCH 28/40] check_variable_access_ordered: Improve error message --- loopy/check.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index bd79cbea9..a45e8ee0c 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -514,8 +514,10 @@ def check_variable_access_ordered(kernel): "'{writer_id}' which writes {var} and " "'{other_id}' which also accesses {var}. " "Either add a (possibly indirect) dependency " - "between the two, or add one to the other's nosync set " - "to indicate that no ordering is intended. " + "between the two, or add them to each others' nosync " + "set to indicate that no ordering is intended, or " + "turn off this check by setting the " + "'enforce_variable_access_ordered' option" .format( writer_id=writer_id, other_id=other_id, -- GitLab From 71f66e64c40db814f8694b33f493907479a79226 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 20:49:59 -0600 Subject: [PATCH 29/40] check_variable_access_ordered: Only trigger if access ranges overlap --- loopy/check.py | 15 +++++++- loopy/symbolic.py | 89 +++++++++++++++++++++++++++++++++++++++------ test/test_apps.py | 2 +- test/test_loopy.py | 4 +- test/test_target.py | 2 - 5 files changed, 94 insertions(+), 18 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index a45e8ee0c..7f56a76ee 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -421,7 +421,7 @@ class IndirectDependencyEdgeFinder(object): return False -def needs_no_sync_with(kernel, var_scope, dep_a, dep_b): +def declares_nosync_with(kernel, var_scope, dep_a, dep_b): from loopy.kernel.data import temp_var_scope if var_scope == temp_var_scope.GLOBAL: search_scopes = ["global", "any"] @@ -455,6 +455,8 @@ def check_variable_access_ordered(kernel): if kernel.options.enforce_variable_access_ordered == "no_check": return + logger.debug("%s: check_variable_access_ordered: start" % kernel.name) + checked_variables = kernel.get_written_variables() & ( set(kernel.temporary_variables) | set(arg for arg in kernel.arg_dict)) @@ -493,6 +495,8 @@ def check_variable_access_ordered(kernel): # Check even for PRIVATE scope, to ensure intentional program order. + from loopy.symbolic import do_access_ranges_overlap_conservative + for writer_id in writers: for other_id in readers | writers: if writer_id == other_id: @@ -502,7 +506,7 @@ def check_variable_access_ordered(kernel): other = kernel.id_to_insn[other_id] has_dependency_relationship = ( - needs_no_sync_with(kernel, scope, other, writer) + declares_nosync_with(kernel, scope, other, writer) or depfind(writer_id, other_id) or @@ -510,6 +514,11 @@ def check_variable_access_ordered(kernel): ) if not has_dependency_relationship: + if not do_access_ranges_overlap_conservative( + kernel, writer_id, "w", other_id, "any", + name): + continue + msg = ("No dependency relationship found between " "'{writer_id}' which writes {var} and " "'{other_id}' which also accesses {var}. " @@ -536,6 +545,8 @@ def check_variable_access_ordered(kernel): warn_with_kernel( kernel, "variable_access_ordered", msg) + logger.debug("%s: check_variable_access_ordered: done" % kernel.name) + # }}} # }}} diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 9e16c3a59..272a7f45b 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1582,11 +1582,11 @@ def get_access_range(domain, subscript, assumptions): class BatchedAccessRangeMapper(WalkMapper): - def __init__(self, kernel, arg_names): + def __init__(self, kernel, var_names): self.kernel = kernel - self.arg_names = set(arg_names) - self.access_ranges = dict((arg, None) for arg in arg_names) - self.bad_subscripts = dict((arg, []) for arg in arg_names) + self.var_names = set(var_names) + self.access_ranges = dict((arg, None) for arg in var_names) + self.bad_subscripts = dict((arg, []) for arg in var_names) def map_subscript(self, expr, inames): domain = self.kernel.get_inames_domain(inames) @@ -1594,7 +1594,7 @@ class BatchedAccessRangeMapper(WalkMapper): assert isinstance(expr.aggregate, p.Variable) - if expr.aggregate.name not in self.arg_names: + if expr.aggregate.name not in self.var_names: return arg_name = expr.aggregate.name @@ -1622,7 +1622,7 @@ class BatchedAccessRangeMapper(WalkMapper): def map_linear_subscript(self, expr, inames): self.rec(expr.index, inames) - if expr.aggregate.name in self.arg_names: + if expr.aggregate.name in self.var_names: self.bad_subscripts[expr.aggregate.name].append(expr) def map_reduction(self, expr, inames): @@ -1634,20 +1634,87 @@ class BatchedAccessRangeMapper(WalkMapper): class AccessRangeMapper(object): - def __init__(self, kernel, arg_name): - self.arg_name = arg_name - self.inner_mapper = BatchedAccessRangeMapper(kernel, [arg_name]) + def __init__(self, kernel, var_name): + self.var_name = var_name + self.inner_mapper = BatchedAccessRangeMapper(kernel, [var_name]) def __call__(self, expr, inames): return self.inner_mapper(expr, inames) @property def access_range(self): - return self.inner_mapper.access_ranges[self.arg_name] + return self.inner_mapper.access_ranges[self.var_name] @property def bad_subscripts(self): - return self.inner_mapper.bad_subscripts[self.arg_name] + return self.inner_mapper.bad_subscripts[self.var_name] + +# }}} + + +# {{{ do_access_ranges_overlap + +def _get_access_range_conservative(kernel, insn_id, access_dir, var_name): + insn = kernel.id_to_insn[insn_id] + from loopy.kernel.instruction import MultiAssignmentBase + + assert access_dir in ["w", "any"] + + if not isinstance(insn, MultiAssignmentBase): + if access_dir == "any": + return var_name in insn.dependency_names() + else: + return var_name in insn.write_dependency_names() + + exprs = list(insn.assignees) + if access_dir == "any": + exprs.append(insn.expression) + exprs.extend(insn.predicates) + + arange = False + for expr in exprs: + arm = AccessRangeMapper(kernel, var_name) + arm(expr, kernel.insn_inames(insn)) + + if arm.bad_subscripts: + return True + + expr_arange = arm.access_range + if expr_arange is None: + continue + + if arange is False: + arange = expr_arange + else: + arange = arange | expr_arange + + return arange + + +def do_access_ranges_overlap_conservative( + kernel, insn1_id, insn1_dir, insn2_id, insn2_dir, var_name): + """Determine whether the access ranges to *var_name* in the two + given instructions overlpa. This determination is made 'conservatively', + i.e. if precise information is unavailable, it is concluded that the + ranges overlap. + + :arg insn1_dir: either ``"w"`` or ``"any"``, to indicate which + type of access is desired--writing or any + :arg insn2_dir: either ``"w"`` or ``"any"`` + :returns: a :class:`bool` + """ + + insn1_arange = _get_access_range_conservative( + kernel, insn1_id, insn1_dir, var_name) + insn2_arange = _get_access_range_conservative( + kernel, insn2_id, insn2_dir, var_name) + + if insn1_arange is False or insn2_arange is False: + return False + if insn1_arange is True or insn2_arange is True: + return True + + return not (insn1_arange & insn2_arange).is_empty() # }}} diff --git a/test/test_apps.py b/test/test_apps.py index 1be7edec1..ff30e3e7a 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -494,7 +494,7 @@ def test_lbm(ctx_factory): end """) - knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") + #knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") knl = lp.add_and_infer_dtypes(knl, {"f": np.float32}) ref_knl = knl diff --git a/test/test_loopy.py b/test/test_loopy.py index e1de0af80..72c52a10e 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -893,8 +893,8 @@ def test_multiple_writes_to_local_temporary(): knl = lp.make_kernel( "{[i,e]: 0<=i<5 and 0<=e temp[i, 0] = 17 {nosync_query=writes:temp} - temp[i, 1] = 15 {nosync_query=writes:temp} + <> temp[i, 0] = 17 + temp[i, 1] = 15 """) knl = lp.tag_inames(knl, dict(i="l.0")) diff --git a/test/test_target.py b/test/test_target.py index c143fbbd2..15964987a 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -215,8 +215,6 @@ def test_random123(ctx_factory, tp): out[i, 3] = real.s3 + 1j * imag.s3 """.replace("TYPE", tp)) - knl = lp.add_nosync(knl, "any", "writes:out", "writes:out", force=True) - knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") knl = lp.set_options(knl, write_cl=True) -- GitLab From a8958f2c7eeebafa198d45359146a2e1649107a1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 20:58:11 -0600 Subject: [PATCH 30/40] Check value of enforce_variable_access_ordered option --- loopy/check.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index 7f56a76ee..0a52044a6 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -452,6 +452,14 @@ def check_variable_access_ordered(kernel): * an explicit statement that no ordering is necessary (expressed through :attr:`loopy.Instruction.no_sync_with`) """ + if kernel.options.enforce_variable_access_ordered not in [ + "no_check", + True, + False]: + raise LoopyError("invalid value for option " + "'enforce_variable_access_ordered': %s" + % kernel.options.enforce_variable_access_ordered) + if kernel.options.enforce_variable_access_ordered == "no_check": return -- GitLab From 46a0535a0d0b88486ead9666984e45df556ab091 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 21:16:45 -0600 Subject: [PATCH 31/40] check_variable_access_ordered: Do not enforce ordering for aliasing-based relationships in different groups --- loopy/check.py | 77 +++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 0a52044a6..0c9308336 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -484,6 +484,8 @@ def check_variable_access_ordered(kernel): *[rmap.get(eq_name, set()) for eq_name in eq_class]) writers = set.union( *[wmap.get(eq_name, set()) for eq_name in eq_class]) + unaliased_readers = rmap.get(name, set()) + unaliased_writers = wmap.get(name, set()) if not writers: continue @@ -521,37 +523,54 @@ def check_variable_access_ordered(kernel): depfind(other_id, writer_id) ) - if not has_dependency_relationship: - if not do_access_ranges_overlap_conservative( + if has_dependency_relationship: + continue + + is_relationship_by_aliasing = not ( + writer_id in unaliased_writers + and (other_id in unaliased_writers + or other_id in unaliased_readers)) + + # Do not enforce ordering for disjoint access ranges + if (not is_relationship_by_aliasing + and not do_access_ranges_overlap_conservative( kernel, writer_id, "w", other_id, "any", - name): - continue + name)): + continue + + # Do not enforce ordering for aliasing-based relationships + # in different groups. + if (is_relationship_by_aliasing and ( + bool(writer.groups & other.conflicts_with_groups) + or + bool(other.groups & writer.conflicts_with_groups))): + continue - msg = ("No dependency relationship found between " - "'{writer_id}' which writes {var} and " - "'{other_id}' which also accesses {var}. " - "Either add a (possibly indirect) dependency " - "between the two, or add them to each others' nosync " - "set to indicate that no ordering is intended, or " - "turn off this check by setting the " - "'enforce_variable_access_ordered' option" - .format( - writer_id=writer_id, - other_id=other_id, - var=( - "the variable '%s'" % name - if len(eq_class) == 1 - else ( - "the aliasing equivalence class '%s'" - % ", ".join(eq_class)) - ))) - if kernel.options.enforce_variable_access_ordered: - from loopy.diagnostic import VariableAccessNotOrdered - raise VariableAccessNotOrdered(msg) - else: - from loopy.diagnostic import warn_with_kernel - warn_with_kernel( - kernel, "variable_access_ordered", msg) + msg = ("No dependency relationship found between " + "'{writer_id}' which writes {var} and " + "'{other_id}' which also accesses {var}. " + "Either add a (possibly indirect) dependency " + "between the two, or add them to each others' nosync " + "set to indicate that no ordering is intended, or " + "turn off this check by setting the " + "'enforce_variable_access_ordered' option" + .format( + writer_id=writer_id, + other_id=other_id, + var=( + "the variable '%s'" % name + if len(eq_class) == 1 + else ( + "the aliasing equivalence class '%s'" + % ", ".join(eq_class)) + ))) + if kernel.options.enforce_variable_access_ordered: + from loopy.diagnostic import VariableAccessNotOrdered + raise VariableAccessNotOrdered(msg) + else: + from loopy.diagnostic import warn_with_kernel + warn_with_kernel( + kernel, "variable_access_ordered", msg) logger.debug("%s: check_variable_access_ordered: done" % kernel.name) -- GitLab From a6d32d89de548f2106ad43c86dae3151001d0ec8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 10 Feb 2018 21:31:46 -0600 Subject: [PATCH 32/40] Fix test_missing_temporary_definition_detection for ordering check with aliasing --- test/test_loopy.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 72c52a10e..86c8c6e43 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1611,8 +1611,10 @@ def test_missing_temporary_definition_detection(): def test_missing_definition_check_respects_aliases(): # Based on https://github.com/inducer/loopy/issues/69 knl = lp.make_kernel("{ [i] : 0<=i Date: Sat, 10 Feb 2018 22:23:26 -0600 Subject: [PATCH 33/40] Remove extraneous lang_version declaration --- test/test_dg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_dg.py b/test/test_dg.py index ae725ab49..e96c76d88 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -75,8 +75,7 @@ def test_dg_volume(ctx_factory): order=order), lp.ValueArg("K", np.int32, approximately=1000), ], - name="dg_volume", assumptions="K>=1", - lang_version=(2018, 1)) + name="dg_volume", assumptions="K>=1") knl = lp.fix_parameters(knl, Np=Np) -- GitLab From cc9450aefdd102ee1184a045bc9e8fb860a64c8f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 17:56:47 -0600 Subject: [PATCH 34/40] check_variable_access_ordered: Comment improvements --- loopy/check.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index 0c9308336..3db049616 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -450,7 +450,7 @@ def check_variable_access_ordered(kernel): * an (at least indirect) depdendency edge, or * an explicit statement that no ordering is necessary (expressed - through :attr:`loopy.Instruction.no_sync_with`) + through a bi-directional :attr:`loopy.Instruction.no_sync_with`) """ if kernel.options.enforce_variable_access_ordered not in [ "no_check", @@ -478,6 +478,9 @@ def check_variable_access_ordered(kernel): aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) for name in checked_variables: + # This is a tad redundant in that this could probably be restructured + # to iterate only over equivalence classes and not individual variables. + # But then the access-range overlap check below would have to be smarter. eq_class = aliasing_equiv_classes[name] readers = set.union( -- GitLab From d260f4e7fae51e739af4583110fe4581bcf320bd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 17:57:52 -0600 Subject: [PATCH 35/40] Barrier insertion: Only emit barriers for overlapping access patterns --- loopy/schedule/__init__.py | 114 +++++++++++++++++++++++++------------ test/test_loopy.py | 49 ++++++++++++---- 2 files changed, 116 insertions(+), 47 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 850f0a61f..b196b343e 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1427,8 +1427,8 @@ class DependencyTracker(object): raise ValueError("unknown 'var_kind': %s" % var_kind) from collections import defaultdict - self.writer_map = defaultdict(set) - self.reader_map = defaultdict(set) + self.base_writer_map = defaultdict(set) + self.base_access_map = defaultdict(set) self.temp_to_base_storage = kernel.get_temporary_to_base_storage_map() def map_to_base_storage(self, var_names): @@ -1442,23 +1442,27 @@ class DependencyTracker(object): return result def discard_all_sources(self): - self.writer_map.clear() - self.reader_map.clear() + self.base_writer_map.clear() + self.base_access_map.clear() + + # Anything with 'base' in the name in this class contains names normalized + # to their 'base_storage'. def add_source(self, source): """ - Specify that an instruction may be used as the source of a dependency edge. + Specify that an instruction used as the source (depended-upon + part) of a dependency edge is of interest to this tracker. """ # If source is an insn ID, look up the actual instruction. source = self.kernel.id_to_insn.get(source, source) for written in self.map_to_base_storage( set(source.assignee_var_names()) & self.relevant_vars): - self.writer_map[written].add(source.id) + self.base_writer_map[written].add(source.id) for read in self.map_to_base_storage( - source.read_dependency_names() & self.relevant_vars): - self.reader_map[read].add(source.id) + source.dependency_names() & self.relevant_vars): + self.base_access_map[read].add(source.id) def gen_dependencies_with_target_at(self, target): """ @@ -1471,51 +1475,87 @@ class DependencyTracker(object): # If target is an insn ID, look up the actual instruction. target = self.kernel.id_to_insn.get(target, target) - tgt_write = self.map_to_base_storage( - set(target.assignee_var_names()) & self.relevant_vars) - tgt_read = self.map_to_base_storage( - target.read_dependency_names() & self.relevant_vars) - - for (accessed_vars, accessor_map) in [ - (tgt_read, self.writer_map), - (tgt_write, self.reader_map), - (tgt_write, self.writer_map)]: + for ( + tgt_dir, src_dir, src_base_var_to_accessor_map + ) in [ + ("any", "w", self.base_writer_map), + ("w", "any", self.base_access_map), + ]: for dep in self.get_conflicting_accesses( - accessed_vars, accessor_map, target.id): + target, tgt_dir, src_dir, src_base_var_to_accessor_map): yield dep - def get_conflicting_accesses( - self, accessed_vars, var_to_accessor_map, target): + def get_conflicting_accesses(self, target, tgt_dir, src_dir, + src_base_var_to_accessor_map): + + def get_written_names(insn): + return set(insn.assignee_var_names()) & self.relevant_vars + + def get_accessed_names(insn): + return insn.dependency_names() & self.relevant_vars + + dir_to_getter = {"w": get_written_names, "any": get_accessed_names} + + def filter_var_set_for_base_storage(var_name_set, base_storage_name): + return set( + name + for name in var_name_set + if (self.temp_to_base_storage.get(name, name) + == base_storage_name)) + + tgt_accessed_vars = dir_to_getter[tgt_dir](target) + tgt_accessed_vars_base = self.map_to_base_storage(tgt_accessed_vars) + + for race_var_base in sorted(tgt_accessed_vars_base): + for source_id in sorted( + src_base_var_to_accessor_map[race_var_base]): - def determine_conflict_nature(source, target): - if (not self.reverse and source in - self.kernel.get_nosync_set(target, scope=self.var_kind)): - return None - if (self.reverse and target in - self.kernel.get_nosync_set(source, scope=self.var_kind)): - return None - return self.describe_dependency(source, target) + # {{{ no barrier if nosync - for var in sorted(accessed_vars): - for source in sorted(var_to_accessor_map[var]): - dep_descr = determine_conflict_nature(source, target) + if (not self.reverse and source_id in + self.kernel.get_nosync_set(target.id, scope=self.var_kind)): + continue + if (self.reverse and target.id in + self.kernel.get_nosync_set(source_id, scope=self.var_kind)): + continue + # }}} + + dep_descr = self.describe_dependency(source_id, target) if dep_descr is None: continue + source = self.kernel.id_to_insn[source_id] + src_race_vars = filter_var_set_for_base_storage( + dir_to_getter[src_dir](source), race_var_base) + tgt_race_vars = filter_var_set_for_base_storage( + tgt_accessed_vars, race_var_base) + + race_var = race_var_base + + # Only one (non-base_storage) race variable name: Data is not + # being passed between aliases, so we may look at indices. + if src_race_vars == tgt_race_vars and len(src_race_vars) == 1: + race_var, = src_race_vars + + from loopy.symbolic import do_access_ranges_overlap_conservative + if not do_access_ranges_overlap_conservative( + self.kernel, target.id, tgt_dir, + source_id, src_dir, race_var): + continue + yield DependencyRecord( - source=self.kernel.id_to_insn[source], - target=self.kernel.id_to_insn[target], + source=source, + target=target, dep_descr=dep_descr, - variable=var, + variable=race_var, var_kind=self.var_kind) - def describe_dependency(self, source, target): + def describe_dependency(self, source_id, target): dep_descr = None - source = self.kernel.id_to_insn[source] - target = self.kernel.id_to_insn[target] + source = self.kernel.id_to_insn[source_id] if self.reverse: source, target = target, source diff --git a/test/test_loopy.py b/test/test_loopy.py index 86c8c6e43..8581ae5b8 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2368,8 +2368,9 @@ def test_nosync_option_parsing(): assert "id=insn5, no_sync_with=insn1@any" in kernel_str -def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): - from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop) +def barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): + from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop, + CallKernel, ReturnFromKernel) watch_for_barrier = False seen_barrier = False loop_level = 0 @@ -2379,9 +2380,7 @@ def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): if sched_item.insn_id == id1: watch_for_barrier = True elif sched_item.insn_id == id2: - assert watch_for_barrier - assert seen_barrier - return + return watch_for_barrier and seen_barrier elif isinstance(sched_item, Barrier): if watch_for_barrier and loop_level not in ignore_barriers_in_levels: seen_barrier = True @@ -2389,6 +2388,11 @@ def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): loop_level += 1 elif isinstance(sched_item, LeaveLoop): loop_level -= 1 + elif isinstance(sched_item, (CallKernel, ReturnFromKernel)): + pass + else: + raise RuntimeError("schedule item type '%s' not understood" + % type(sched_item).__name__) raise RuntimeError("id2 was not seen") @@ -2415,9 +2419,9 @@ def test_barrier_insertion_near_top_of_loop(): print(knl) - assert_barrier_between(knl, "ainit", "tcomp") - assert_barrier_between(knl, "tcomp", "bcomp1") - assert_barrier_between(knl, "bcomp1", "bcomp2") + assert barrier_between(knl, "ainit", "tcomp") + assert barrier_between(knl, "tcomp", "bcomp1") + assert barrier_between(knl, "bcomp1", "bcomp2") def test_barrier_insertion_near_bottom_of_loop(): @@ -2442,8 +2446,8 @@ def test_barrier_insertion_near_bottom_of_loop(): print(knl) - assert_barrier_between(knl, "bcomp1", "bcomp2") - assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) + assert barrier_between(knl, "bcomp1", "bcomp2") + assert barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) def test_barrier_in_overridden_get_grid_size_expanded_kernel(): @@ -2827,6 +2831,31 @@ def test_check_for_variable_access_ordering_with_aliasing(): lp.get_one_scheduled_kernel(knl) +@pytest.mark.parametrize(("second_index", "expect_barrier"), + [ + ("2*i", True), + ("2*i+1", False), + ]) +def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): + knl = lp.make_kernel( + "{[i]: 0<=i<128}", + """ + a[2*i] = 12 {id=first} + a[%s] = 13 {id=second,dep=first} + """ % second_index, + [ + lp.TemporaryVariable("a", lp.auto, shape=(256,), + scope=lp.temp_var_scope.LOCAL), + ]) + + knl = lp.tag_inames(knl, "i:l.0") + + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) + + assert barrier_between(knl, "first", "second") == expect_barrier + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 498f8eb0a5e682070cb5a6cb3a4847bbdedb593b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 18:10:39 -0600 Subject: [PATCH 36/40] Fix no_sync_with_docs --- loopy/kernel/instruction.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index a2991c47b..6510da4da 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -91,7 +91,7 @@ class InstructionBase(ImmutableRecord): .. attribute:: no_sync_with - a :class:`frozenset` of tuples of the form `(insn_id, scope)`, where + a :class:`frozenset` of tuples of the form ``(insn_id, scope)``, where `insn_id` refers to :attr:`id` of :class:`Instruction` instances and `scope` is one of the following strings: @@ -99,11 +99,12 @@ class InstructionBase(ImmutableRecord): - `"global"` - `"any"`. - This indicates (symmetrically) that program semantics are not affected by - execution ordering of the involved instructions. In particular, - no barrier synchronization will be considered necessary with the given - instruction using barriers of type `scope`, even given the existence of - a dependency chain and apparently conflicting access. + An element ``(insn_id, scope)`` means "no conflicting variable access + for variables of ``scope`` between this instruction and ``insn_id``. + Specifically, loopy will not complain even if it detects that accesses + potentially requiring ordering (e.g. by dependencies) exist, and it + will not emit barriers to guard any dependencies from this + instruction on ``insn_id`` that may exist. Note, that :attr:`no_sync_with` allows instruction matching through wildcards and match expression, just like :attr:`depends_on`. -- GitLab From 8ec4984d771d52b50b741f1758d1b2994fcbe6d1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 18:28:08 -0600 Subject: [PATCH 37/40] get_access_range: Be more robust/consistent about catching and reporting errors --- loopy/check.py | 10 ++++------ loopy/symbolic.py | 26 ++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 3db049616..83f529206 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -279,7 +279,8 @@ class _AccessCheckMapper(WalkMapper): if not isinstance(subscript, tuple): subscript = (subscript,) - from loopy.symbolic import get_dependencies, get_access_range + from loopy.symbolic import (get_dependencies, get_access_range, + UnableToDetermineAccessRange) available_vars = set(self.domain.get_var_dict()) shape_deps = set() @@ -300,11 +301,8 @@ class _AccessCheckMapper(WalkMapper): try: access_range = get_access_range(self.domain, subscript, self.kernel.assumptions) - except isl.Error: - # Likely: index was non-linear, nothing we can do. - return - except TypeError: - # Likely: index was non-linear, nothing we can do. + except UnableToDetermineAccessRange: + # Likely: index was non-affine, nothing we can do. return shape_domain = isl.BasicSet.universe(access_range.get_space()) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 272a7f45b..aa9a08844 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1537,6 +1537,10 @@ class PrimeAdder(IdentityMapper): # {{{ get access range +class UnableToDetermineAccessRange(Exception): + pass + + def get_access_range(domain, subscript, assumptions): domain, assumptions = isl.align_two(domain, assumptions) @@ -1558,8 +1562,17 @@ def get_access_range(domain, subscript, assumptions): access_map = access_map.insert_dims(dim_type.set, dn, dims) for idim in range(dims): - idx_aff = aff_from_expr(access_map.get_space(), - subscript[idim]) + sub_idim = subscript[idim] + with isl.SuppressedWarnings(domain.get_ctx()): + try: + idx_aff = aff_from_expr(access_map.get_space(), sub_idim) + except TypeError as e: + raise UnableToDetermineAccessRange( + "%s: %s" % (type(e).__name__, str(e))) + except isl.Error as e: + raise UnableToDetermineAccessRange( + "%s: %s" % (type(e).__name__, str(e))) + idx_aff = idx_aff.set_coefficient_val( dim_type.in_, dn+idim, -1) @@ -1604,7 +1617,12 @@ class BatchedAccessRangeMapper(WalkMapper): self.bad_subscripts[arg_name].append(expr) return - access_range = get_access_range(domain, subscript, self.kernel.assumptions) + try: + access_range = get_access_range( + domain, subscript, self.kernel.assumptions) + except UnableToDetermineAccessRange: + self.bad_subscripts[arg_name].append(expr) + return if self.access_ranges[arg_name] is None: self.access_ranges[arg_name] = access_range @@ -1652,7 +1670,7 @@ class AccessRangeMapper(object): # }}} -# {{{ do_access_ranges_overlap +# {{{ do_access_ranges_overlap_conservative def _get_access_range_conservative(kernel, insn_id, access_dir, var_name): insn = kernel.id_to_insn[insn_id] -- GitLab From 2ee1d076305cff44a7d95d56e38ad42ddceeaa38 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 21:26:46 -0600 Subject: [PATCH 38/40] Typo: overlpa->overlap --- loopy/symbolic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index aa9a08844..242ba6ab7 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1712,7 +1712,7 @@ def _get_access_range_conservative(kernel, insn_id, access_dir, var_name): def do_access_ranges_overlap_conservative( kernel, insn1_id, insn1_dir, insn2_id, insn2_dir, var_name): """Determine whether the access ranges to *var_name* in the two - given instructions overlpa. This determination is made 'conservatively', + given instructions overlap. This determination is made 'conservatively', i.e. if precise information is unavailable, it is concluded that the ranges overlap. -- GitLab From c08262639adb1780d6cf5bd26e8b0ff0552c2bcf Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 21:29:01 -0600 Subject: [PATCH 39/40] Fix no_sync_with doc indentation. Plus minor phrasing change --- loopy/kernel/instruction.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 6510da4da..95001c78b 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -99,12 +99,13 @@ class InstructionBase(ImmutableRecord): - `"global"` - `"any"`. - An element ``(insn_id, scope)`` means "no conflicting variable access - for variables of ``scope`` between this instruction and ``insn_id``. - Specifically, loopy will not complain even if it detects that accesses - potentially requiring ordering (e.g. by dependencies) exist, and it - will not emit barriers to guard any dependencies from this - instruction on ``insn_id`` that may exist. + An element ``(insn_id, scope)`` means "do not consider any variable + access conflicting for variables of ``scope`` between this instruction + and ``insn_id``". + Specifically, loopy will not complain even if it detects that accesses + potentially requiring ordering (e.g. by dependencies) exist, and it + will not emit barriers to guard any dependencies from this + instruction on ``insn_id`` that may exist. Note, that :attr:`no_sync_with` allows instruction matching through wildcards and match expression, just like :attr:`depends_on`. -- GitLab From b9fc7cc0e690ddc6925ca005ea6ef0a62195c337 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 21:30:12 -0600 Subject: [PATCH 40/40] Remove extraneous, commented out check disabling in test_apps --- test/test_apps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_apps.py b/test/test_apps.py index ff30e3e7a..12b59e18a 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -494,7 +494,6 @@ def test_lbm(ctx_factory): end """) - #knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") knl = lp.add_and_infer_dtypes(knl, {"f": np.float32}) ref_knl = knl -- GitLab