From fc0076d269b87d211bda01348a70f9a7fa4cfa71 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 9 Aug 2011 14:39:26 +0200
Subject: [PATCH] Allow use of constant memory.

---
 loopy/__init__.py         |  1 -
 loopy/codegen/__init__.py |  7 +++++--
 loopy/kernel.py           | 11 ++++++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 049327d03..417957584 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -11,7 +11,6 @@ register_mpz_with_pymbolic()
 
 
 
-# TODO: Constant memory (plus check for count)
 # TODO: Reuse of previously split dimensions for prefetch
 #   (Or general merging)
 
diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index e8415bdbb..4048ca971 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -145,7 +145,7 @@ def generate_code(kernel):
             Define, Line, Const, LiteralLines, Initializer)
 
     from cgen.opencl import (CLKernel, CLGlobal, CLRequiredWorkGroupSize,
-            CLLocal, CLImage)
+            CLLocal, CLImage, CLConstant)
 
     from loopy.symbolic import LoopyCCodeMapper
     my_ccm = LoopyCCodeMapper(kernel)
@@ -180,7 +180,10 @@ def generate_code(kernel):
             arg_decl = restrict_ptr_if_not_nvidia(
                     POD(arg.dtype, arg.name))
             if arg_decl.name in kernel.input_vectors():
-                arg_decl = Const(arg_decl)
+                if arg.constant_mem:
+                    arg_decl = CLConstant(Const(arg_decl))
+                else:
+                    arg_decl = Const(arg_decl)
             arg_decl = CLGlobal(arg_decl)
         elif isinstance(arg, ImageArg):
             if arg.name in kernel.input_vectors():
diff --git a/loopy/kernel.py b/loopy/kernel.py
index f3dff96bd..b8f9d9753 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -14,7 +14,7 @@ import pyopencl as cl
 
 class ArrayArg:
     def __init__(self, name, dtype, strides=None, shape=None, order="C",
-            offset=0):
+            offset=0, constant_mem=False):
         """
         All of the following are optional. Specify either strides or shape.
 
@@ -49,6 +49,8 @@ class ArrayArg:
         self.strides = strides
         self.offset = offset
 
+        self.constant_mem = constant_mem
+
     def __repr__(self):
         return "<ArrayArg '%s' of type %s>" % (self.name, self.dtype)
 
@@ -493,6 +495,13 @@ class LoopKernel(Record):
                 msg(4, "using more local memory than available--"
                         "possibly OK due to cache nature")
 
+        const_arg_count = sum(
+                1 for arg in self.args
+                if isinstance(arg, ArrayArg) and arg.constant_mem)
+
+        if const_arg_count > self.device.max_constant_args:
+            msg(5, "too many constant arguments")
+
         max_severity = 0
         for sev, msg in msgs:
             max_severity = max(sev, max_severity)
-- 
GitLab