From fc0076d269b87d211bda01348a70f9a7fa4cfa71 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Tue, 9 Aug 2011 14:39:26 +0200 Subject: [PATCH] Allow use of constant memory. --- loopy/__init__.py | 1 - loopy/codegen/__init__.py | 7 +++++-- loopy/kernel.py | 11 ++++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 049327d03..417957584 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -11,7 +11,6 @@ register_mpz_with_pymbolic() -# TODO: Constant memory (plus check for count) # TODO: Reuse of previously split dimensions for prefetch # (Or general merging) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index e8415bdbb..4048ca971 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -145,7 +145,7 @@ def generate_code(kernel): Define, Line, Const, LiteralLines, Initializer) from cgen.opencl import (CLKernel, CLGlobal, CLRequiredWorkGroupSize, - CLLocal, CLImage) + CLLocal, CLImage, CLConstant) from loopy.symbolic import LoopyCCodeMapper my_ccm = LoopyCCodeMapper(kernel) @@ -180,7 +180,10 @@ def generate_code(kernel): arg_decl = restrict_ptr_if_not_nvidia( POD(arg.dtype, arg.name)) if arg_decl.name in kernel.input_vectors(): - arg_decl = Const(arg_decl) + if arg.constant_mem: + arg_decl = CLConstant(Const(arg_decl)) + else: + arg_decl = Const(arg_decl) arg_decl = CLGlobal(arg_decl) elif isinstance(arg, ImageArg): if arg.name in kernel.input_vectors(): diff --git a/loopy/kernel.py b/loopy/kernel.py index f3dff96bd..b8f9d9753 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -14,7 +14,7 @@ import pyopencl as cl class ArrayArg: def __init__(self, name, dtype, strides=None, shape=None, order="C", - offset=0): + offset=0, constant_mem=False): """ All of the following are optional. Specify either strides or shape. @@ -49,6 +49,8 @@ class ArrayArg: self.strides = strides self.offset = offset + self.constant_mem = constant_mem + def __repr__(self): return "<ArrayArg '%s' of type %s>" % (self.name, self.dtype) @@ -493,6 +495,13 @@ class LoopKernel(Record): msg(4, "using more local memory than available--" "possibly OK due to cache nature") + const_arg_count = sum( + 1 for arg in self.args + if isinstance(arg, ArrayArg) and arg.constant_mem) + + if const_arg_count > self.device.max_constant_args: + msg(5, "too many constant arguments") + max_severity = 0 for sev, msg in msgs: max_severity = max(sev, max_severity) -- GitLab