Skip to content
Snippets Groups Projects
Commit d21e4963 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Fix alignment of aliased temporary storage

parent 238c80e7
No related branches found
No related tags found
No related merge requests found
......@@ -91,8 +91,9 @@ class CTarget(TargetBase):
base_storage_sizes = {}
base_storage_to_is_local = {}
base_storage_to_align_bytes = {}
from cgen import ArrayOf, Pointer, Initializer
from cgen import ArrayOf, Pointer, Initializer, AlignedAttribute
from loopy.codegen import POD # uses the correct complex type
from cgen.opencl import CLLocal
......@@ -124,6 +125,16 @@ class CTarget(TargetBase):
base_storage_to_is_local.setdefault(tv.base_storage, []).append(
tv.is_local)
align_size = tv.dtype.itemsize
from loopy.kernel.array import VectorArrayDimTag
for dim_tag, axis_len in zip(tv.dim_tags, tv.shape):
if isinstance(dim_tag, VectorArrayDimTag):
align_size *= axis_len
base_storage_to_align_bytes.setdefault(tv.base_storage, []).append(
align_size)
for idi in decl_info:
cast_decl = POD(self, idi.dtype, "")
temp_var_decl = POD(self, idi.dtype, idi.name)
......@@ -162,6 +173,9 @@ class CTarget(TargetBase):
bs_var_decl = ArrayOf(bs_var_decl, max(bs_sizes))
alignment = max(base_storage_to_align_bytes[bs_name])
bs_var_decl = AlignedAttribute(alignment, bs_var_decl)
body.append(bs_var_decl)
body.extend(temp_decls)
......
Subproject commit ac1c71d46428c14aa1bd1c09d7da19cd0298d5cc
Subproject commit fb6ba114d9d906403d47b0aaf69e2fe4cef382f2
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment