diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 8ebf0cd007fad808398e85d0661c7a8ed2860f19..e63ca2f63a19eefb9cdf8eca436a6fcdd555111c 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -401,7 +401,7 @@ def match_dtype_to_c_struct(device, name, dtype, context=None): src = r""" #define pycl_offsetof(st, m) \ - ((size_t) ( (char *)&((st *)0)->m - (char *)0 )) + ((size_t) ((__local char *) &(dummy.m) - (__local char *)&dummy )) %(pre_decls)s @@ -410,6 +410,7 @@ def match_dtype_to_c_struct(device, name, dtype, context=None): __kernel void get_size_and_offsets(__global size_t *result) { result[0] = sizeof(%(my_type)s); + __local %(my_type)s dummy; %(offset_code)s } """ % dict( @@ -428,9 +429,24 @@ def match_dtype_to_c_struct(device, name, dtype, context=None): import pyopencl.array result_buf = cl.array.empty(queue, 1+len(fields), np.uintp) - knl(queue, (1,), None, result_buf.data) + knl(queue, (1,), (1,), result_buf.data) + queue.finish() size_and_offsets = result_buf.get() + size = int(size_and_offsets[0]) + + from pytools import any + offsets = size_and_offsets[1:] + if any(ofs >= size for ofs in offsets): + # offsets not plausible + + if dtype.itemsize == size: + # If sizes match, use numpy's idea of the offsets. + offsets = [offset + for field_name, (field_dtype, offset) in fields] + else: + raise RuntimeError("cannot discover struct layout on '%s'" % device) + result_buf.data.release() del knl del prg @@ -440,7 +456,7 @@ def match_dtype_to_c_struct(device, name, dtype, context=None): dtype_arg_dict = dict( names=[field_name for field_name, (field_dtype, offset) in fields], formats=[field_dtype for field_name, (field_dtype, offset) in fields], - offsets=[int(x) for x in size_and_offsets[1:]], + offsets=[int(x) for x in offsets], itemsize=int(size_and_offsets[0]), ) dtype = np.dtype(dtype_arg_dict)