From b66344e0cec02cbe3eb045a798cbfc88a2460467 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 24 Jan 2018 18:29:35 -0600 Subject: [PATCH 1/9] Numpy execution: Enable support for relaxed stride checks (closes #121). --- loopy/target/execution.py | 22 +++++++++++++++++----- test/test_loopy.py | 20 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 2aa76e099..facd56a07 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -363,6 +363,10 @@ class ExecutionWrapperGeneratorBase(object): from loopy.types import NumpyType gen("# {{{ set up array arguments") + + gen("") + gen("def _lpy_filter_stride(shape, stride):") + gen(" return tuple(s for dim, s in zip(shape, stride) if dim > 1)") gen("") if not options.no_numpy: @@ -516,13 +520,21 @@ class ExecutionWrapperGeneratorBase(object): itemsize = kernel_arg.dtype.numpy_dtype.itemsize sym_strides = tuple( itemsize*s_i for s_i in arg.unvec_strides) - gen("if %s.strides != %s:" - % (arg.name, strify(sym_strides))) + gen("if _lpy_filter_stride(%s.shape, %s.strides) != " + "_lpy_filter_stride(%s.shape, %s):" + % ( + arg.name, arg.name, arg.name, + strify(sym_strides))) with Indentation(gen): gen("raise TypeError(\"strides mismatch on " - "argument '%s' (got: %%s, expected: %%s)\" " - "%% (%s.strides, %s))" - % (arg.name, arg.name, strify(sym_strides))) + "argument '%s' " + "(after removing unit length dims, " + "got: %%s, expected: %%s)\" " + "%% (_lpy_filter_stride(%s.shape, %s.strides), " + "_lpy_filter_stride(%s.shape, %s)))" + % ( + arg.name, arg.name, arg.name, arg.name, + strify(sym_strides))) if not arg.allows_offset: gen("if hasattr(%s, 'offset') and %s.offset:" % ( diff --git a/test/test_loopy.py b/test/test_loopy.py index e624ed346..375b59dcb 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2746,6 +2746,26 @@ def test_arg_inference_for_predicates(): assert knl.arg_dict["incr"].shape == (10,) +def test_relaxed_stride_checks(ctx_factory): + # Check that loopy is compatible with numpy's relaxed stride rules. + ctx = ctx_factory() + + knl = lp.make_kernel("{[i,j]: 0 <= i <= n and 0 <= j <= m}", + """ + a[i] = sum(j, A[i,j] * b[j]) + """) + + with cl.CommandQueue(ctx) as queue: + A = np.zeros((1, 10), order="F") + # Force convert A to C order. numpy will preserve strides in this case. + A = np.array(A, copy=False, order="C") + b = np.zeros(10, dtype=np.float64) + + evt, (a,) = knl(queue, A=A, b=b) + + assert a == 0 + + def test_add_prefetch_works_in_lhs_index(): knl = lp.make_kernel( "{ [n,k,l,k1,l1,k2,l2]: " -- GitLab From 4bc46b9b87cd25a6cceb235fff1d0b4928af4a1b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 24 Jan 2018 18:45:54 -0600 Subject: [PATCH 2/9] Remove a no-op statement --- test/test_loopy.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 375b59dcb..0e55cdba6 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2757,8 +2757,6 @@ def test_relaxed_stride_checks(ctx_factory): with cl.CommandQueue(ctx) as queue: A = np.zeros((1, 10), order="F") - # Force convert A to C order. numpy will preserve strides in this case. - A = np.array(A, copy=False, order="C") b = np.zeros(10, dtype=np.float64) evt, (a,) = knl(queue, A=A, b=b) -- GitLab From 1a9cd3b9e8b4c724776820f5ee97ec925c90d1ea Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 24 Jan 2018 18:47:30 -0600 Subject: [PATCH 3/9] flake8 fix --- test/test_loopy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 0e55cdba6..497d1e2f4 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2756,10 +2756,10 @@ def test_relaxed_stride_checks(ctx_factory): """) with cl.CommandQueue(ctx) as queue: - A = np.zeros((1, 10), order="F") + mat = np.zeros((1, 10), order="F") b = np.zeros(10, dtype=np.float64) - evt, (a,) = knl(queue, A=A, b=b) + evt, (a,) = knl(queue, A=mat, b=b) assert a == 0 -- GitLab From 315282c3d1ba153026443bc14375308b0f684f5c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 24 Jan 2018 19:07:22 -0600 Subject: [PATCH 4/9] Relax stride checks in handle_alloc() --- loopy/target/c/c_execution.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index c136a9f36..4fd248c87 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -107,7 +107,8 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): #check strides if not skip_arg_checks: - gen("assert %(strides)s == %(name)s.strides, " + gen("assert _lpy_filter_stride(%(name)s.shape, %(strides)s) " + "== _lpy_filter_stride(%(name)s.shape, %(name)s.strides), " "'Strides of loopy created array %(name)s, " "do not match expected.'" % dict(name=arg.name, -- GitLab From 24cc712e41de5222cdad29d0b2bb5909efb0252f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 24 Jan 2018 19:20:13 -0600 Subject: [PATCH 5/9] Remove an extraneous dtype --- test/test_loopy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 497d1e2f4..db18b2a21 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2757,7 +2757,7 @@ def test_relaxed_stride_checks(ctx_factory): with cl.CommandQueue(ctx) as queue: mat = np.zeros((1, 10), order="F") - b = np.zeros(10, dtype=np.float64) + b = np.zeros(10) evt, (a,) = knl(queue, A=mat, b=b) -- GitLab From fd10efe4ea012e3e459281a8d083992a657a3f3e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 25 Jan 2018 11:41:17 -0600 Subject: [PATCH 6/9] Change strides check to avoid function calls. --- loopy/target/c/c_execution.py | 14 ++++++++--- loopy/target/execution.py | 45 +++++++++++++++++++++++------------ 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 4fd248c87..bba3a8d56 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -105,13 +105,21 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): kernel_arg.dtype.numpy_dtype), order=order)) + expected_strides = tuple( + var("_lpy_expected_strides_%s" % i) + for i in range(num_axes)) + + gen("(%s,) = %s.strides" % (", ".join(expected_strides), arg.name)) + #check strides if not skip_arg_checks: - gen("assert _lpy_filter_stride(%(name)s.shape, %(strides)s) " - "== _lpy_filter_stride(%(name)s.shape, %(name)s.strides), " + strides_check_expr = self.get_strides_check_expr( + sym_shape, sym_strides, expected_strides) + gen("assert %(strides_check)s, " "'Strides of loopy created array %(name)s, " "do not match expected.'" % - dict(name=arg.name, + dict(strides_check=strides_check_expr, + name=arg.name, strides=strify(sym_strides))) for i in range(num_axes): gen("del _lpy_shape_%d" % i) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index facd56a07..18d33461c 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -351,6 +351,13 @@ class ExecutionWrapperGeneratorBase(object): def get_arg_pass(self, arg): raise NotImplementedError() + def get_strides_check_expr(self, shape, strides, sym_strides): + # Returns an expression suitable for use for checking the strides of an + # argument. + return " and ".join( + "(%s == 1 or %s == %s)" % elem + for elem in zip(shape, strides, sym_strides)) + # {{{ arg setup def generate_arg_setup( @@ -364,11 +371,6 @@ class ExecutionWrapperGeneratorBase(object): gen("# {{{ set up array arguments") - gen("") - gen("def _lpy_filter_stride(shape, stride):") - gen(" return tuple(s for dim, s in zip(shape, stride) if dim > 1)") - gen("") - if not options.no_numpy: gen("_lpy_encountered_numpy = False") gen("_lpy_encountered_dev = False") @@ -520,21 +522,34 @@ class ExecutionWrapperGeneratorBase(object): itemsize = kernel_arg.dtype.numpy_dtype.itemsize sym_strides = tuple( itemsize*s_i for s_i in arg.unvec_strides) - gen("if _lpy_filter_stride(%s.shape, %s.strides) != " - "_lpy_filter_stride(%s.shape, %s):" - % ( - arg.name, arg.name, arg.name, - strify(sym_strides))) + + ndim = len(arg.unvec_shape) + shape = ["_lpy_shape_%d" % i for i in range(ndim)] + strides = ["_lpy_stride_%d" % i for i in range(ndim)] + + gen("(%s,) = %s.shape" % (", ".join(shape), arg.name)) + gen("(%s,) = %s.strides" % (", ".join(strides), arg.name)) + + gen("if not (%s):" + % self.get_strides_check_expr( + shape, strides, + (strify(s) for s in sym_strides))) with Indentation(gen): + gen("_lpy_got = tuple(stride " + "for (dim, stride) in zip(%s.shape, %s.strides) " + "if dim > 1)" + % (arg.name, arg.name)) + gen("_lpy_expected = tuple(stride " + "for (dim, stride) in zip(%s.shape, %s) " + "if dim > 1)" + % (arg.name, strify_tuple(sym_strides))) + gen("raise TypeError(\"strides mismatch on " "argument '%s' " "(after removing unit length dims, " "got: %%s, expected: %%s)\" " - "%% (_lpy_filter_stride(%s.shape, %s.strides), " - "_lpy_filter_stride(%s.shape, %s)))" - % ( - arg.name, arg.name, arg.name, arg.name, - strify(sym_strides))) + "%% (_lpy_got, _lpy_expected))" + % arg.name) if not arg.allows_offset: gen("if hasattr(%s, 'offset') and %s.offset:" % ( -- GitLab From adfc728ec94744af3207695493a12f548f57d4a9 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 25 Jan 2018 11:44:24 -0600 Subject: [PATCH 7/9] Add back deleted empty line generator --- loopy/target/execution.py | 1 + 1 file changed, 1 insertion(+) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 18d33461c..a032664d5 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -370,6 +370,7 @@ class ExecutionWrapperGeneratorBase(object): from loopy.types import NumpyType gen("# {{{ set up array arguments") + gen("") if not options.no_numpy: gen("_lpy_encountered_numpy = False") -- GitLab From 4a6907f3b501949d0261421858667b69f4e7f2d0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 25 Jan 2018 11:47:16 -0600 Subject: [PATCH 8/9] Avoid extra parens --- loopy/target/execution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index a032664d5..0ac82b71d 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -531,7 +531,7 @@ class ExecutionWrapperGeneratorBase(object): gen("(%s,) = %s.shape" % (", ".join(shape), arg.name)) gen("(%s,) = %s.strides" % (", ".join(strides), arg.name)) - gen("if not (%s):" + gen("if not %s:" % self.get_strides_check_expr( shape, strides, (strify(s) for s in sym_strides))) -- GitLab From 036c90f6ecc82ad64b05ec8335bb9f2cc4e78a70 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 25 Jan 2018 12:05:32 -0600 Subject: [PATCH 9/9] Fix string/var mismatch --- loopy/target/c/c_execution.py | 6 ++++-- loopy/target/execution.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index bba3a8d56..d8b76d32a 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -109,12 +109,14 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): var("_lpy_expected_strides_%s" % i) for i in range(num_axes)) - gen("(%s,) = %s.strides" % (", ".join(expected_strides), arg.name)) + gen("%s = %s.strides" % (strify(expected_strides), arg.name)) #check strides if not skip_arg_checks: strides_check_expr = self.get_strides_check_expr( - sym_shape, sym_strides, expected_strides) + (strify(s) for s in sym_shape), + (strify(s) for s in sym_strides), + (strify(s) for s in expected_strides)) gen("assert %(strides_check)s, " "'Strides of loopy created array %(name)s, " "do not match expected.'" % diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 0ac82b71d..3a3ea0a70 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -353,7 +353,7 @@ class ExecutionWrapperGeneratorBase(object): def get_strides_check_expr(self, shape, strides, sym_strides): # Returns an expression suitable for use for checking the strides of an - # argument. + # argument. Arguments should be sequences of strings. return " and ".join( "(%s == 1 or %s == %s)" % elem for elem in zip(shape, strides, sym_strides)) -- GitLab