From d21e496345f9714c87cd59190af13f7661602201 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Thu, 20 Aug 2015 15:33:37 -0500 Subject: [PATCH] Fix alignment of aliased temporary storage --- loopy/target/c/__init__.py | 16 +++++++++++++++- loopy/target/c/compyte | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 520bb8a33..ca71c2126 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -91,8 +91,9 @@ class CTarget(TargetBase): base_storage_sizes = {} base_storage_to_is_local = {} + base_storage_to_align_bytes = {} - from cgen import ArrayOf, Pointer, Initializer + from cgen import ArrayOf, Pointer, Initializer, AlignedAttribute from loopy.codegen import POD # uses the correct complex type from cgen.opencl import CLLocal @@ -124,6 +125,16 @@ class CTarget(TargetBase): base_storage_to_is_local.setdefault(tv.base_storage, []).append( tv.is_local) + align_size = tv.dtype.itemsize + + from loopy.kernel.array import VectorArrayDimTag + for dim_tag, axis_len in zip(tv.dim_tags, tv.shape): + if isinstance(dim_tag, VectorArrayDimTag): + align_size *= axis_len + + base_storage_to_align_bytes.setdefault(tv.base_storage, []).append( + align_size) + for idi in decl_info: cast_decl = POD(self, idi.dtype, "") temp_var_decl = POD(self, idi.dtype, idi.name) @@ -162,6 +173,9 @@ class CTarget(TargetBase): bs_var_decl = ArrayOf(bs_var_decl, max(bs_sizes)) + alignment = max(base_storage_to_align_bytes[bs_name]) + bs_var_decl = AlignedAttribute(alignment, bs_var_decl) + body.append(bs_var_decl) body.extend(temp_decls) diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index ac1c71d46..fb6ba114d 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit ac1c71d46428c14aa1bd1c09d7da19cd0298d5cc +Subproject commit fb6ba114d9d906403d47b0aaf69e2fe4cef382f2 -- GitLab