diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 520bb8a33e10a53d5d0335eee3095a514be41818..ca71c21269add662dc1ef19a4437c9f297ec6477 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -91,8 +91,9 @@ class CTarget(TargetBase): base_storage_sizes = {} base_storage_to_is_local = {} + base_storage_to_align_bytes = {} - from cgen import ArrayOf, Pointer, Initializer + from cgen import ArrayOf, Pointer, Initializer, AlignedAttribute from loopy.codegen import POD # uses the correct complex type from cgen.opencl import CLLocal @@ -124,6 +125,16 @@ class CTarget(TargetBase): base_storage_to_is_local.setdefault(tv.base_storage, []).append( tv.is_local) + align_size = tv.dtype.itemsize + + from loopy.kernel.array import VectorArrayDimTag + for dim_tag, axis_len in zip(tv.dim_tags, tv.shape): + if isinstance(dim_tag, VectorArrayDimTag): + align_size *= axis_len + + base_storage_to_align_bytes.setdefault(tv.base_storage, []).append( + align_size) + for idi in decl_info: cast_decl = POD(self, idi.dtype, "") temp_var_decl = POD(self, idi.dtype, idi.name) @@ -162,6 +173,9 @@ class CTarget(TargetBase): bs_var_decl = ArrayOf(bs_var_decl, max(bs_sizes)) + alignment = max(base_storage_to_align_bytes[bs_name]) + bs_var_decl = AlignedAttribute(alignment, bs_var_decl) + body.append(bs_var_decl) body.extend(temp_decls) diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index ac1c71d46428c14aa1bd1c09d7da19cd0298d5cc..fb6ba114d9d906403d47b0aaf69e2fe4cef382f2 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit ac1c71d46428c14aa1bd1c09d7da19cd0298d5cc +Subproject commit fb6ba114d9d906403d47b0aaf69e2fe4cef382f2