diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 34f58e2864b51db9ffa1f2c0657d8fc4e406931f..df50b4d33d3eac90e9f2a63e4121280a0109986d 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -624,7 +624,7 @@ class ArrayBase(ImmutableRecord):
 
     def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
             dim_names=None, strides=None, order=None, for_atomic=False,
-            target=None,
+            target=None, alignment=None,
             **kwargs):
         """
         All of the following (except *name*) are optional.
@@ -662,6 +662,7 @@ class ArrayBase(ImmutableRecord):
             Whether the array is declared for atomic access, and, if necessary,
             using atomic-capable data types.
         :arg offset: (See :attr:`offset`)
+        :arg alignment: memory alignment in bytes
 
         """
 
@@ -816,6 +817,7 @@ class ArrayBase(ImmutableRecord):
                 offset=offset,
                 dim_names=dim_names,
                 order=order,
+                alignment=alignment,
                 **kwargs)
 
     def __eq__(self, other):
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index f4b124303cceb773a6593b3b9350723842e8476d..177daa02948b9c07ef1d9856dc04019e69e24897 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -709,6 +709,10 @@ class CASTBuilder(ASTBuilderBase):
                     ecm(p.flattened_product(decl_info.shape),
                         prec=PREC_NONE, type_context="i"))
 
+        if temp_var.alignment:
+            from cgen import AlignedAttribute
+            temp_var_decl = AlignedAttribute(temp_var.alignment, temp_var_decl)
+
         return temp_var_decl
 
     def wrap_temporary_decl(self, decl, scope):
diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index 9955705a2fbca26d12dad95b45d2f547ad778568..744c03d8ed091bc0f05e4fc41aa14e88ec89276a 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -61,6 +61,11 @@ def adjust_local_temp_var_storage(kernel, device):
                     temp_var.copy(storage_shape=temp_var.shape)
             continue
 
+        if not temp_var.shape:
+            # scalar, no need to mess with storage shape
+            new_temp_vars[temp_var.name] = temp_var
+            continue
+
         other_loctemp_nbytes = [
                 tv.nbytes
                 for tv in six.itervalues(kernel.temporary_variables)
diff --git a/loopy/version.py b/loopy/version.py
index 888fb95f9d28b04692a9e86865a23610e7bd9f5c..7141a678297ded5e0d6e2f16f065f035a034d540 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v75-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v76-islpy%s" % _islpy_version