diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 34f58e2864b51db9ffa1f2c0657d8fc4e406931f..b672f0227b1b8ba931b844b80a24b75c9625286d 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -608,6 +608,21 @@ class ArrayBase(ImmutableRecord): to generate more informative names than could be achieved by axis numbers. + .. attribute:: alignment + + Memory alignment of the array in bytes. For temporary arrays, + this ensures they are allocated with this alignment. For arguments, + this entails a promise that the incoming array obeys this alignment + restriction. + + Defaults to *None*. + + If an integer N is given, the array would be declared + with ``__attribute__((aligned(N)))`` in code generation for + :class:`loopy.CTarget`. + + .. versionadded:: 2018.1 + .. automethod:: __init__ .. automethod:: __eq__ .. automethod:: num_user_axes @@ -624,7 +639,7 @@ class ArrayBase(ImmutableRecord): def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, - target=None, + target=None, alignment=None, **kwargs): """ All of the following (except *name*) are optional. @@ -662,6 +677,7 @@ class ArrayBase(ImmutableRecord): Whether the array is declared for atomic access, and, if necessary, using atomic-capable data types. :arg offset: (See :attr:`offset`) + :arg alignment: memory alignment in bytes """ @@ -816,6 +832,7 @@ class ArrayBase(ImmutableRecord): offset=offset, dim_names=dim_names, order=order, + alignment=alignment, **kwargs) def __eq__(self, other): diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index f4b124303cceb773a6593b3b9350723842e8476d..177daa02948b9c07ef1d9856dc04019e69e24897 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -709,6 +709,10 @@ class CASTBuilder(ASTBuilderBase): ecm(p.flattened_product(decl_info.shape), prec=PREC_NONE, type_context="i")) + if temp_var.alignment: + from cgen import AlignedAttribute + temp_var_decl = AlignedAttribute(temp_var.alignment, temp_var_decl) + return temp_var_decl def wrap_temporary_decl(self, decl, scope): diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 9955705a2fbca26d12dad95b45d2f547ad778568..744c03d8ed091bc0f05e4fc41aa14e88ec89276a 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -61,6 +61,11 @@ def adjust_local_temp_var_storage(kernel, device): temp_var.copy(storage_shape=temp_var.shape) continue + if not temp_var.shape: + # scalar, no need to mess with storage shape + new_temp_vars[temp_var.name] = temp_var + continue + other_loctemp_nbytes = [ tv.nbytes for tv in six.itervalues(kernel.temporary_variables) diff --git a/loopy/version.py b/loopy/version.py index 888fb95f9d28b04692a9e86865a23610e7bd9f5c..7141a678297ded5e0d6e2f16f065f035a034d540 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v75-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v76-islpy%s" % _islpy_version