From 030e458f3b7302d1de9f7986a0154c4d14b6519a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 14 Nov 2016 14:19:28 -0600 Subject: [PATCH] Fix generation of temp var shape (especially for temp vars with base shape) Should fix https://github.com/inducer/loopy/issues/50. --- loopy/target/c/__init__.py | 23 ++++++++++++++++++----- loopy/target/c/codegen/expression.py | 5 ++++- loopy/target/ispc.py | 12 ++++++++---- test/test_loopy.py | 13 +++++++++++++ 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 50ae5856b..de0fa01fc 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -1,4 +1,4 @@ -"""OpenCL target independent of PyOpenCL.""" +"""Plain C target and base for other C-family languages.""" from __future__ import division, absolute_import @@ -33,6 +33,7 @@ from cgen import Pointer from cgen.mapper import IdentityMapper as CASTIdentityMapperBase from pymbolic.mapper.stringifier import PREC_NONE from loopy.symbolic import IdentityMapper +import pymbolic.primitives as p from pytools import memoize_method @@ -409,7 +410,8 @@ class CASTBuilder(ASTBuilderBase): if tv.scope != temp_var_scope.GLOBAL: decl = self.wrap_temporary_decl( self.get_temporary_decl( - kernel, schedule_index, tv, idi), tv.scope) + codegen_state, schedule_index, tv, idi), + tv.scope) if tv.initializer is not None: decl = Initializer(decl, generate_array_literal( @@ -467,12 +469,21 @@ class CASTBuilder(ASTBuilderBase): idi.dtype.itemsize * product(si for si in idi.shape)) + ecm = self.get_expression_to_code_mapper(codegen_state) + for bs_name, bs_sizes in sorted(six.iteritems(base_storage_sizes)): bs_var_decl = Value("char", bs_name) from pytools import single_valued bs_var_decl = self.wrap_temporary_decl( bs_var_decl, single_valued(base_storage_to_scope[bs_name])) - bs_var_decl = ArrayOf(bs_var_decl, max(bs_sizes)) + + # FIXME: Could try to use isl knowledge to simplify max. + if all(isinstance(bs, int) for bs in bs_sizes): + bs_size_max = max(bs_sizes) + else: + bs_size_max = p.Max(tuple(bs_sizes)) + + bs_var_decl = ArrayOf(bs_var_decl, ecm(bs_size_max)) alignment = max(base_storage_to_align_bytes[bs_name]) bs_var_decl = AlignedAttribute(alignment, bs_var_decl) @@ -509,7 +520,7 @@ class CASTBuilder(ASTBuilderBase): from loopy.target.c.codegen.expression import CExpressionToCodeMapper return CExpressionToCodeMapper() - def get_temporary_decl(self, knl, schedule_index, temp_var, decl_info): + def get_temporary_decl(self, codegen_state, schedule_index, temp_var, decl_info): temp_var_decl = POD(self, decl_info.dtype, decl_info.name) if temp_var.read_only: @@ -518,8 +529,10 @@ class CASTBuilder(ASTBuilderBase): if decl_info.shape: from cgen import ArrayOf + ecm = self.get_expression_to_code_mapper(codegen_state) temp_var_decl = ArrayOf(temp_var_decl, - " * ".join(str(s) for s in decl_info.shape)) + ecm(p.flattened_product(decl_info.shape), + prec=PREC_NONE, type_context="i")) return temp_var_decl diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 51cfc7fe6..ffa2c4e62 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -105,7 +105,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.infer_type(expr), needed_dtype, RecursiveMapper.rec(self, expr, type_context)) - def __call__(self, expr, prec, type_context=None, needed_dtype=None): + def __call__(self, expr, prec=None, type_context=None, needed_dtype=None): + if prec is None: + prec = PREC_NONE + assert prec == PREC_NONE from loopy.target.c import CExpression return CExpression( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 536a186e7..230b284c5 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -32,6 +32,7 @@ from loopy.diagnostic import LoopyError from loopy.symbolic import Literal from pymbolic import var import pymbolic.primitives as p +from pymbolic.mapper.stringifier import PREC_NONE from pytools import memoize_method @@ -295,7 +296,7 @@ class ISPCASTBuilder(CASTBuilder): else: raise LoopyError("unknown barrier kind") - def get_temporary_decl(self, knl, sched_index, temp_var, decl_info): + def get_temporary_decl(self, codegen_state, sched_index, temp_var, decl_info): from loopy.target.c import POD # uses the correct complex type temp_var_decl = POD(self, decl_info.dtype, decl_info.name) @@ -306,13 +307,16 @@ class ISPCASTBuilder(CASTBuilder): # FIXME: This is a pretty coarse way of deciding what # private temporaries get duplicated. Refine? (See also # above in expr to code mapper) - _, lsize = knl.get_grid_size_upper_bounds_as_exprs() + _, lsize = codegen_state.kernel.get_grid_size_upper_bounds_as_exprs() shape = lsize + shape if shape: from cgen import ArrayOf - temp_var_decl = ArrayOf(temp_var_decl, - " * ".join(str(s) for s in shape)) + ecm = self.get_expression_to_code_mapper(codegen_state) + temp_var_decl = ArrayOf( + temp_var_decl, + ecm(p.flattened_product(decl_info.shape), + prec=PREC_NONE, type_context="i")) return temp_var_decl diff --git a/test/test_loopy.py b/test/test_loopy.py index 972946cbe..e0e619a1c 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1587,6 +1587,19 @@ def test_temp_initializer(ctx_factory, src_order, tmp_order): assert np.array_equal(a, a2) +def test_base_storage_decl(): + knl = lp.make_kernel( + "{ [i]: 0<=i 1: exec(sys.argv[1]) -- GitLab