From fd727dd554251761b830c1a09f3df1b5b98c9df3 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 2 Nov 2016 14:41:57 -0500 Subject: [PATCH 1/6] Improve kernel printing on terminals with black backgrounds --- loopy/kernel/__init__.py | 2 +- loopy/schedule/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index e0ec0df31..c26dc28a5 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1200,7 +1200,7 @@ class LoopKernel(RecordWithoutPickling): if lhs: core = "%s <- %s" % ( - Fore.BLUE+lhs+Style.RESET_ALL, + Fore.CYAN+lhs+Style.RESET_ALL, Fore.MAGENTA+rhs+Style.RESET_ALL, ) else: diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index d6eb1a32c..ae9ebacf5 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -372,7 +372,7 @@ def format_insn(kernel, insn_id): Style = kernel.options._style return "[%s] %s%s%s <- %s%s%s" % ( format_insn_id(kernel, insn_id), - Fore.BLUE, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL, + Fore.CYAN, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL, Fore.MAGENTA, str(insn.expression), Style.RESET_ALL) -- GitLab From 42e078f71d9d95a13f02a4d5caf9329de8c697bb Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 2 Nov 2016 14:42:21 -0500 Subject: [PATCH 2/6] Fix warning logic for non-const lmem use --- loopy/target/pyopencl.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index bdd5773b3..b245c44d3 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -35,7 +35,7 @@ from loopy.kernel.data import CallMangleInfo from loopy.target.opencl import OpenCLTarget, OpenCLCASTBuilder from loopy.target.python import PythonASTBuilderBase from loopy.types import NumpyType -from loopy.diagnostic import LoopyError +from loopy.diagnostic import LoopyError, warn_with_kernel from warnings import warn import logging @@ -172,9 +172,17 @@ def check_sizes(kernel, device): if product(llens) > device.max_work_group_size: raise LoopyError("work group too big") + local_mem_use = kernel.local_mem_use() + from pyopencl.characterize import usable_local_mem_size - if kernel.local_mem_use() > usable_local_mem_size(device): - raise LoopyError("using too much local memory") + import numbers + if isinstance(local_mem_use, numbers.Integral): + if local_mem_use > usable_local_mem_size(device): + raise LoopyError("using too much local memory") + else: + warn_with_kernel(kernel, "non_constant_local_mem", + "The amount of local memory used by the kernel " + "is not a constant. This will likely cause problems.") from loopy.kernel.data import ConstantArg const_arg_count = sum( -- GitLab From 56e7f6136d7ce51721f225a69de849c90e2d4862 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 2 Nov 2016 14:42:49 -0500 Subject: [PATCH 3/6] Temporary scope finding: don't abort early --- loopy/preprocess.py | 1 - 1 file changed, 1 deletion(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 09437f213..48bfbba3d 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -405,7 +405,6 @@ def find_temporary_scope(kernel): # parallel inames of that kind: and bool(cpin)): desired_scope = max(desired_scope, scope) - break desired_scope_per_insn.append(desired_scope) -- GitLab From bf82f4203547d28431fc2a6f056dc2542f2cd919 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 2 Nov 2016 14:59:25 -0500 Subject: [PATCH 4/6] Fix stats test to avoid global temporary --- test/test_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_statistics.py b/test/test_statistics.py index 4bcacf59e..68be5b8a2 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -538,7 +538,7 @@ def test_barrier_counter_barriers(): name="weird2", ) knl = lp.add_and_infer_dtypes(knl, dict(a=np.int32)) - knl = lp.split_iname(knl, "k", 128, outer_tag="g.0", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 128, inner_tag="l.0") poly = lp.get_synchronization_poly(knl) print(poly) n = 512 -- GitLab From 2c77b30a33e6f09436e24d096b6866970d18ad04 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 2 Nov 2016 17:34:10 -0500 Subject: [PATCH 5/6] More fixes to temp var scope finding --- loopy/preprocess.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 48bfbba3d..c0f42e55a 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -315,7 +315,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): def find_temporary_scope(kernel): - logger.debug("%s: mark local temporaries" % kernel.name) + logger.debug("%s: find temporary scope" % kernel.name) new_temp_vars = {} from loopy.kernel.data import (LocalIndexTagBase, GroupIndexTag, @@ -388,8 +388,10 @@ def find_temporary_scope(kernel): grpparallel_compute_inames, temp_var_scope.GLOBAL), ]: - if (apin != cpin and bool(locparallel_assignee_inames)): - warn_with_kernel(kernel, "write_race_local(%s)" % insn_id, + if (apin != cpin and bool(apin)): + warn_with_kernel( + kernel, + "write_race_%s(%s)" % (scope_descr, insn_id), "instruction '%s' looks invalid: " "it assigns to indices based on %s IDs, but " "its temporary '%s' cannot be made %s because " -- GitLab From cf1e633566b8ab627509c7e575fdece060d718ed Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 2 Nov 2016 17:34:44 -0500 Subject: [PATCH 6/6] Adjust doctest for changes to temp var scope finding --- doc/tutorial.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 87daa9fc4..fd884f194 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -923,6 +923,7 @@ Consider the following example: ... out[16*i_outer + i_inner] = sum(k, a_temp[k]) ... """) >>> knl = lp.tag_inames(knl, dict(i_outer="g.0", i_inner="l.0")) + >>> knl = lp.set_temporary_scope(knl, "a_temp", "local") >>> knl = lp.set_options(knl, "write_cl") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) @@ -1479,7 +1480,7 @@ Now to make things more interesting, we'll create a kernel with barriers: ... "..." ... ]) >>> knl = lp.add_and_infer_dtypes(knl, dict(a=np.int32)) - >>> knl = lp.split_iname(knl, "k", 128, outer_tag="g.0", inner_tag="l.0") + >>> knl = lp.split_iname(knl, "k", 128, inner_tag="l.0") >>> code, _ = lp.generate_code(lp.preprocess_kernel(knl)) >>> print(code) #define lid(N) ((int) get_local_id(N)) @@ -1488,6 +1489,8 @@ Now to make things more interesting, we'll create a kernel with barriers: __kernel void __attribute__ ((reqd_work_group_size(97, 1, 1))) loopy_kernel(__global int const *__restrict__ a, __global int *__restrict__ e) { __local int c[50 * 10 * 99]; + + int const k_outer = 0; for (int j = 0; j <= 9; ++j) for (int i = 0; i <= 49; ++i) -- GitLab