diff --git a/loopy/target/execution.py b/loopy/target/execution.py index d594f3e10a9fef7e3e16a2848d68eca7f29ebfc9..da5a54e6b4eafe39117b8b01bba2fdf570f3be8b 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -696,7 +696,7 @@ typed_and_scheduled_cache = WriteOncePersistentDict( invoker_cache = WriteOncePersistentDict( - "loopy-invoker-cache-v1-"+DATA_MODEL_VERSION, + "loopy-invoker-cache-v10-"+DATA_MODEL_VERSION, key_builder=LoopyKeyBuilder()) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index bb04ddc3d8f4b01b859242766bc0230c68dccb23..f9b37b53f6cbedc22bb319952bb05357ef980b54 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -493,9 +493,9 @@ class PyOpenCLTarget(OpenCLTarget): use_int8_for_bool=use_int8_for_bool) import pyopencl.version - if pyopencl.version.VERSION < (2021, 1): + if pyopencl.version.VERSION < (2021, 2): raise RuntimeError("The version of loopy you have installed " - "generates invoker code that requires PyOpenCL 2021.1 " + "generates invoker code that requires PyOpenCL 2021.2 " "or newer.") self.device = device diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index cdee5600bb5dd0dce3a3971583604f737c6913d9..d23301077f1f14f85f93074f067b09acf1faa95a 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -48,6 +48,8 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): "out_host=None" ] super().__init__(system_args) + from pytools import UniqueNameGenerator + self.dtype_name_generator = UniqueNameGenerator(forced_prefix="_lpy_dtype_") def python_dtype_str(self, dtype): import pyopencl.tools as cl_tools @@ -90,47 +92,46 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): from pymbolic import var num_axes = len(arg.strides) - for i in range(num_axes): - gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) itemsize = kernel_arg.dtype.numpy_dtype.itemsize for i in range(num_axes): - gen("_lpy_strides_%d = %s" % (i, strify( - itemsize*arg.unvec_strides[i]))) + gen("_lpy_ustrides_%d = %s" % (i, strify( + arg.unvec_strides[i]))) if not skip_arg_checks: for i in range(num_axes): - gen("assert _lpy_strides_%d > 0, " + gen("assert _lpy_ustrides_%d > 0, " "\"'%s' has negative stride in axis %d\"" % (i, arg.name, i)) - sym_strides = tuple( - var("_lpy_strides_%d" % i) + sym_ustrides = tuple( + var("_lpy_ustrides_%d" % i) for i in range(num_axes)) sym_shape = tuple( - var("_lpy_shape_%d" % i) + arg.unvec_shape[i] for i in range(num_axes)) - alloc_size_expr = (sum(astrd*(alen-1) - for alen, astrd in zip(sym_shape, sym_strides)) - + itemsize) + size_expr = (sum(astrd*(alen-1) + for alen, astrd in zip(sym_shape, sym_ustrides)) + + 1) - gen("_lpy_alloc_size = %s" % strify(alloc_size_expr)) - gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, " - "%(dtype)s, strides=%(strides)s, " - "data=allocator(_lpy_alloc_size), allocator=allocator)" - % dict( - name=arg.name, - shape=strify(sym_shape), - strides=strify(sym_strides), - dtype=self.python_dtype_str(kernel_arg.dtype.numpy_dtype))) + gen("_lpy_size = %s" % strify(size_expr)) + sym_strides = tuple(itemsize*s_i for s_i in sym_ustrides) + dtype_str = self.python_dtype_str(kernel_arg.dtype.numpy_dtype) - if not skip_arg_checks: - for i in range(num_axes): - gen("del _lpy_shape_%d" % i) - gen("del _lpy_strides_%d" % i) - gen("del _lpy_alloc_size") - gen("") + dtype_name = self.dtype_name_generator() + gen.add_to_preamble(f"{dtype_name} = _lpy_np.dtype({dtype_str})") + gen(f"{arg.name} = _lpy_cl_array.Array(None, {strify(sym_shape)}, " + f"{dtype_name}, strides={strify(sym_strides)}, " + f"data=allocator({strify(itemsize * var('_lpy_size'))}), " + "allocator=allocator, " + "_fast=True, _size=_lpy_size, " + "_context=queue.context, _queue=queue)") + + for i in range(num_axes): + gen("del _lpy_ustrides_%d" % i) + gen("del _lpy_size") + gen("") # }}}