From 90a86acdf8231f01cf1bc57e0f9fec1b02350a02 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 18 Jan 2021 23:47:14 -0600 Subject: [PATCH 1/8] Use PyOpenCL 2021.1's _set_arg*multi interface for CL arg passing --- loopy/target/pyopencl.py | 74 +++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 8d0c309b0..56d3eedb5 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -292,6 +292,12 @@ class PyOpenCLTarget(OpenCLTarget): super().__init__( atomics_flavor=atomics_flavor) + import pyopencl.version + if pyopencl.version.VERSION < (2021, 1): + raise RuntimeError("The version of loopy you have installed " + "generates invoker code that requires PyOpenCL 2021.1 " + "or newer.") + self.device = device self.pyopencl_module_name = pyopencl_module_name @@ -490,6 +496,9 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): result = [] gen = result.append + cl_arg_indices = [] + cl_args = [] + for arg_idx, idi in enumerate(implemented_data_info): arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx @@ -501,16 +510,15 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): continue - gen(Comment("{{{ process %s" % idi.name)) - gen(Line()) - if not options.skip_arg_checks: gen(If("%s is None" % idi.name, Raise('RuntimeError("input argument \'{name}\' ' 'must be supplied")'.format(name=idi.name)))) if idi.dtype.is_composite(): - gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, idi.name))) + cl_arg_indices.append(cl_arg_idx) + cl_args.append(f"{idi.name}") + cl_arg_idx += 1 elif idi.dtype.is_complex(): @@ -535,32 +543,18 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): if (work_around_arg_count_bug and dtype.numpy_dtype == np.complex128 and fp_arg_count + 2 <= 8): - gen(Assign( - "_lpy_buf", - "_lpy_pack('{arg_char}', {arg_var}.real)" - .format(arg_char=arg_char, arg_var=idi.name))) - gen(S( - "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)" - .format(cl_arg_idx=cl_arg_idx))) + cl_arg_indices.append(cl_arg_idx) + cl_args.append(f"_lpy_pack('{arg_char}', {idi.name}.real)") cl_arg_idx += 1 - gen(Assign( - "_lpy_buf", - "_lpy_pack('{arg_char}', {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=idi.name))) - gen(S( - "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)" - .format(cl_arg_idx=cl_arg_idx))) + cl_arg_indices.append(cl_arg_idx) + cl_args.append(f"_lpy_pack('{arg_char}', {idi.name}.imag)") cl_arg_idx += 1 else: - gen(Assign( - "_lpy_buf", - "_lpy_pack('{arg_char}{arg_char}', " - "{arg_var}.real, {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=idi.name))) - gen(S( - "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)" - .format(cl_arg_idx=cl_arg_idx))) + cl_arg_indices.append(cl_arg_idx) + cl_args.append( + f"_lpy_pack('{arg_char}{arg_char}', " + f"{idi.name}.real, {idi.name}.imag)") cl_arg_idx += 1 fp_arg_count += 2 @@ -569,9 +563,8 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): if idi.dtype.dtype.kind == "f": fp_arg_count += 1 - gen(S( - "_lpy_knl._set_arg_buf(%d, _lpy_pack('%s', %s))" - % (cl_arg_idx, idi.dtype.dtype.char, idi.name))) + cl_arg_indices.append(cl_arg_idx) + cl_args.append(f"_lpy_pack('{idi.dtype.dtype.char}', {idi.name})") cl_arg_idx += 1 @@ -579,10 +572,14 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): raise LoopyError("do not know how to pass argument of type '%s'" % idi.dtype) - gen(Line()) + if cl_arg_indices: + assert len(cl_arg_indices) == len(cl_args) - gen(Comment("}}}")) gen(Line()) + gen(S(f"_lpy_knl._set_arg_buf_multi(" + f"({', '.join(str(i) for i in cl_arg_indices)},)," + f"({', '.join(cl_args)},)" + ")")) return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx @@ -596,13 +593,20 @@ def generate_array_arg_setup(kernel, implemented_data_info, arg_idx_to_cl_arg_id result = [] gen = result.append + cl_arg_indices = [] + cl_args = [] for arg_idx, arg in enumerate(implemented_data_info): - if not issubclass(arg.arg_class, ArrayBase): - continue + if issubclass(arg.arg_class, ArrayBase): + cl_arg_indices.append(arg_idx_to_cl_arg_idx[arg_idx]) + cl_args.append(arg.name) - cl_arg_idx = arg_idx_to_cl_arg_idx[arg_idx] + if cl_arg_indices: + assert len(cl_arg_indices) == len(cl_args) - gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, arg.name))) + gen(S(f"_lpy_knl._set_arg_multi(" + f"({', '.join(str(i) for i in cl_arg_indices)},)," + f"({', '.join(cl_args)},)" + ")")) return Suite(result) -- GitLab From 8336738c7bbe49c09d8b523d6a4b20f8342c8f18 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 18 Jan 2021 23:48:50 -0600 Subject: [PATCH 2/8] Point requirements.txt at speed-up-enqeue for pyopencl --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8016ee7a8..043dafe57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ git+https://github.com/inducer/pytools.git#egg=pytools >= 2021.1 git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/cgen.git#egg=cgen -git+https://github.com/inducer/pyopencl.git#egg=pyopencl +git+https://github.com/inducer/pyopencl.git@speed-up-enqeue#egg=pyopencl git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/genpy.git#egg=genpy git+https://github.com/inducer/codepy.git#egg=codepy -- GitLab From 1938ee66dea2eb8b62908f292dc0bdceecf61190 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 18 Jan 2021 23:52:33 -0600 Subject: [PATCH 3/8] Placate flake8 --- loopy/target/pyopencl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 56d3eedb5..4d66aad82 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -490,8 +490,7 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): fp_arg_count = 0 - from genpy import ( - Comment, Line, If, Raise, Assign, Statement as S, Suite) + from genpy import Line, If, Raise, Statement as S, Suite result = [] gen = result.append -- GitLab From cee60272c91e922d5e14d5e090c31d60b150b025 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jan 2021 12:17:40 -0600 Subject: [PATCH 4/8] Track pyopencl change: set_arg*multi uses only a single tuple --- loopy/target/pyopencl.py | 47 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 4d66aad82..4936c634d 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -495,8 +495,7 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): result = [] gen = result.append - cl_arg_indices = [] - cl_args = [] + cl_indices_and_args = [] for arg_idx, idi in enumerate(implemented_data_info): arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx @@ -515,8 +514,8 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): 'must be supplied")'.format(name=idi.name)))) if idi.dtype.is_composite(): - cl_arg_indices.append(cl_arg_idx) - cl_args.append(f"{idi.name}") + cl_indices_and_args.append(cl_arg_idx) + cl_indices_and_args.append(f"{idi.name}") cl_arg_idx += 1 @@ -542,16 +541,18 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): if (work_around_arg_count_bug and dtype.numpy_dtype == np.complex128 and fp_arg_count + 2 <= 8): - cl_arg_indices.append(cl_arg_idx) - cl_args.append(f"_lpy_pack('{arg_char}', {idi.name}.real)") + cl_indices_and_args.append(cl_arg_idx) + cl_indices_and_args.append( + f"_lpy_pack('{arg_char}', {idi.name}.real)") cl_arg_idx += 1 - cl_arg_indices.append(cl_arg_idx) - cl_args.append(f"_lpy_pack('{arg_char}', {idi.name}.imag)") + cl_indices_and_args.append(cl_arg_idx) + cl_indices_and_args.append( + f"_lpy_pack('{arg_char}', {idi.name}.imag)") cl_arg_idx += 1 else: - cl_arg_indices.append(cl_arg_idx) - cl_args.append( + cl_indices_and_args.append(cl_arg_idx) + cl_indices_and_args.append( f"_lpy_pack('{arg_char}{arg_char}', " f"{idi.name}.real, {idi.name}.imag)") cl_arg_idx += 1 @@ -562,8 +563,9 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): if idi.dtype.dtype.kind == "f": fp_arg_count += 1 - cl_arg_indices.append(cl_arg_idx) - cl_args.append(f"_lpy_pack('{idi.dtype.dtype.char}', {idi.name})") + cl_indices_and_args.append(cl_arg_idx) + cl_indices_and_args.append( + f"_lpy_pack('{idi.dtype.dtype.char}', {idi.name})") cl_arg_idx += 1 @@ -571,13 +573,12 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): raise LoopyError("do not know how to pass argument of type '%s'" % idi.dtype) - if cl_arg_indices: - assert len(cl_arg_indices) == len(cl_args) + if cl_indices_and_args: + assert len(cl_indices_and_args) % 2 == 0 gen(Line()) gen(S(f"_lpy_knl._set_arg_buf_multi(" - f"({', '.join(str(i) for i in cl_arg_indices)},)," - f"({', '.join(cl_args)},)" + f"({', '.join(str(i) for i in cl_indices_and_args)},)" ")")) return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx @@ -592,19 +593,17 @@ def generate_array_arg_setup(kernel, implemented_data_info, arg_idx_to_cl_arg_id result = [] gen = result.append - cl_arg_indices = [] - cl_args = [] + cl_indices_and_args = [] for arg_idx, arg in enumerate(implemented_data_info): if issubclass(arg.arg_class, ArrayBase): - cl_arg_indices.append(arg_idx_to_cl_arg_idx[arg_idx]) - cl_args.append(arg.name) + cl_indices_and_args.append(arg_idx_to_cl_arg_idx[arg_idx]) + cl_indices_and_args.append(arg.name) - if cl_arg_indices: - assert len(cl_arg_indices) == len(cl_args) + if cl_indices_and_args: + assert len(cl_indices_and_args) % 2 == 0 gen(S(f"_lpy_knl._set_arg_multi(" - f"({', '.join(str(i) for i in cl_arg_indices)},)," - f"({', '.join(cl_args)},)" + f"({', '.join(str(i) for i in cl_indices_and_args)},)" ")")) return Suite(result) -- GitLab From 58cd039aa52f961e68defe45bfd1685a924fec1c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jan 2021 19:01:34 -0600 Subject: [PATCH 5/8] Make use of Kernel._set_arg_buf_pack_multi in pyopencl kernel invoke --- loopy/target/pyopencl.py | 53 +++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 4936c634d..c2b7ae5f8 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -490,12 +490,24 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): fp_arg_count = 0 - from genpy import Line, If, Raise, Statement as S, Suite + from genpy import If, Raise, Statement as S, Suite result = [] gen = result.append - cl_indices_and_args = [] + buf_indices_and_args = [] + buf_pack_indices_and_args = [] + + from pyopencl.invoker import BUF_PACK_TYPECHARS + + def add_buf_arg(arg_idx, typechar, expr_str): + if typechar in BUF_PACK_TYPECHARS: + buf_pack_indices_and_args.append(cl_arg_idx) + buf_pack_indices_and_args.append(repr(typechar.encode())) + buf_pack_indices_and_args.append(expr_str) + else: + buf_indices_and_args.append(cl_arg_idx) + buf_indices_and_args.append(f"pack('{typechar}', {expr_str})") for arg_idx, idi in enumerate(implemented_data_info): arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx @@ -514,8 +526,8 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): 'must be supplied")'.format(name=idi.name)))) if idi.dtype.is_composite(): - cl_indices_and_args.append(cl_arg_idx) - cl_indices_and_args.append(f"{idi.name}") + buf_indices_and_args.append(cl_arg_idx) + buf_indices_and_args.append(f"{idi.name}") cl_arg_idx += 1 @@ -541,18 +553,14 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): if (work_around_arg_count_bug and dtype.numpy_dtype == np.complex128 and fp_arg_count + 2 <= 8): - cl_indices_and_args.append(cl_arg_idx) - cl_indices_and_args.append( - f"_lpy_pack('{arg_char}', {idi.name}.real)") + add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.real") cl_arg_idx += 1 - cl_indices_and_args.append(cl_arg_idx) - cl_indices_and_args.append( - f"_lpy_pack('{arg_char}', {idi.name}.imag)") + add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.imag") cl_arg_idx += 1 else: - cl_indices_and_args.append(cl_arg_idx) - cl_indices_and_args.append( + buf_indices_and_args.append(cl_arg_idx) + buf_indices_and_args.append( f"_lpy_pack('{arg_char}{arg_char}', " f"{idi.name}.real, {idi.name}.imag)") cl_arg_idx += 1 @@ -563,23 +571,22 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): if idi.dtype.dtype.kind == "f": fp_arg_count += 1 - cl_indices_and_args.append(cl_arg_idx) - cl_indices_and_args.append( - f"_lpy_pack('{idi.dtype.dtype.char}', {idi.name})") - + add_buf_arg(cl_arg_idx, idi.dtype.dtype.char, idi.name) cl_arg_idx += 1 else: raise LoopyError("do not know how to pass argument of type '%s'" % idi.dtype) - if cl_indices_and_args: - assert len(cl_indices_and_args) % 2 == 0 - - gen(Line()) - gen(S(f"_lpy_knl._set_arg_buf_multi(" - f"({', '.join(str(i) for i in cl_indices_and_args)},)" - ")")) + for arg_kind, args_and_indices, entry_length in [ + ("_buf", buf_indices_and_args, 2), + ("_buf_pack", buf_pack_indices_and_args, 3), + ]: + assert len(args_and_indices) % 2 == 0 + if args_and_indices: + gen(S(f"_lpy_knl._set_arg{arg_kind}_multi(" + f"({', '.join(str(i) for i in args_and_indices)},), " + ")")) return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx -- GitLab From e7fae1568ababd87e75a93b01351c3537d99f2c6 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jan 2021 19:15:48 -0600 Subject: [PATCH 6/8] Fix lenght assert on args_and_indices in invoker generation --- loopy/target/pyopencl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index c2b7ae5f8..28771d1d3 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -582,7 +582,7 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): ("_buf", buf_indices_and_args, 2), ("_buf_pack", buf_pack_indices_and_args, 3), ]: - assert len(args_and_indices) % 2 == 0 + assert len(args_and_indices) % entry_length == 0 if args_and_indices: gen(S(f"_lpy_knl._set_arg{arg_kind}_multi(" f"({', '.join(str(i) for i in args_and_indices)},), " -- GitLab From 3baf01bec6fc1d229d63142b6aca645ab57b782a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jan 2021 23:56:16 -0600 Subject: [PATCH 7/8] Fix scope leakage in add_buf_arg --- loopy/target/pyopencl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 28771d1d3..df7f65868 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -502,11 +502,11 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): def add_buf_arg(arg_idx, typechar, expr_str): if typechar in BUF_PACK_TYPECHARS: - buf_pack_indices_and_args.append(cl_arg_idx) + buf_pack_indices_and_args.append(arg_idx) buf_pack_indices_and_args.append(repr(typechar.encode())) buf_pack_indices_and_args.append(expr_str) else: - buf_indices_and_args.append(cl_arg_idx) + buf_indices_and_args.append(arg_idx) buf_indices_and_args.append(f"pack('{typechar}', {expr_str})") for arg_idx, idi in enumerate(implemented_data_info): -- GitLab From 64e1802d818b5902f6ebfa0248c2427e1145d8b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Wed, 20 Jan 2021 19:28:52 -0600 Subject: [PATCH 8/8] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 043dafe57..8016ee7a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ git+https://github.com/inducer/pytools.git#egg=pytools >= 2021.1 git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/cgen.git#egg=cgen -git+https://github.com/inducer/pyopencl.git@speed-up-enqeue#egg=pyopencl +git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/genpy.git#egg=genpy git+https://github.com/inducer/codepy.git#egg=codepy -- GitLab