From 0a9f6ed2f245aa93dae08042d3fa5353728c7185 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sat, 5 Nov 2011 14:04:30 -0500
Subject: [PATCH] Add support for constant array arguments.

---
 loopy/__init__.py         |  2 +-
 loopy/codegen/__init__.py | 15 ++++++++++-----
 loopy/cse.py              |  4 ++--
 loopy/kernel.py           |  3 ++-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 843356bf3..379f1c3b8 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -20,7 +20,7 @@ class LoopyAdvisory(UserWarning):
 
 # {{{ imported user interface
 
-from loopy.kernel import ScalarArg, ArrayArg, ImageArg
+from loopy.kernel import ScalarArg, ArrayArg, ConstantArrayArg, ImageArg
 
 from loopy.kernel import AutoFitLocalIndexTag, get_dot_dependency_graph
 from loopy.cse import realize_cse
diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index dd4b4dec5..fb3e00bd3 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -183,7 +183,7 @@ def generate_code(kernel):
             POD, Value, ArrayOf, Module, Block,
             Line, Const, LiteralLines, Initializer)
 
-    from cgen.opencl import (CLKernel, CLGlobal, CLRequiredWorkGroupSize,
+    from cgen.opencl import (CLKernel, CLGlobal, CLConstant, CLRequiredWorkGroupSize,
             CLLocal, CLImage, CLConstant)
 
     from loopy.symbolic import LoopyCCodeMapper
@@ -207,11 +207,11 @@ def generate_code(kernel):
     has_double = False
     has_image = False
 
-    from loopy.kernel import ArrayArg, ImageArg
+    from loopy.kernel import ArrayArg, ConstantArrayArg, ImageArg, ScalarArg
 
     args = []
     for arg in kernel.args:
-        if isinstance(arg, ArrayArg):
+        if isinstance(arg, (ConstantArrayArg, ArrayArg)):
             arg_decl = restrict_ptr_if_not_nvidia(
                     POD(arg.dtype, arg.name))
             if arg_decl.name not in kernel.get_written_variables():
@@ -219,7 +219,10 @@ def generate_code(kernel):
                     arg_decl = CLConstant(Const(arg_decl))
                 else:
                     arg_decl = Const(arg_decl)
-            arg_decl = CLGlobal(arg_decl)
+            if isinstance(arg, ConstantArrayArg):
+                arg_decl = CLConstant(arg_decl)
+            else:
+                arg_decl = CLGlobal(arg_decl)
         elif isinstance(arg, ImageArg):
             if arg.name in kernel.get_written_variables():
                 mode = "w"
@@ -229,8 +232,10 @@ def generate_code(kernel):
             arg_decl = CLImage(arg.dimensions, mode, arg.name)
 
             has_image = True
-        else:
+        elif isinstance(arg, ScalarArg):
             arg_decl = Const(POD(arg.dtype, arg.name))
+        else:
+            raise ValueError("argument type not understood: '%s'" % type(arg))
 
         if arg.dtype in [np.float64, np.complex128]:
             has_double = True
diff --git a/loopy/cse.py b/loopy/cse.py
index ab576a701..2fe4340ab 100644
--- a/loopy/cse.py
+++ b/loopy/cse.py
@@ -342,8 +342,8 @@ def realize_cse(kernel, cse_tag, dtype, independent_inames=[],
 
     if not set(independent_inames) <= kernel.all_inames():
         raise ValueError("In CSE realization for '%s': "
-                "cannot make inames '%s' independent--"
-                "they don't already exist" % (
+                "cannot make iname(s) '%s' independent--"
+                "it/they don't already exist" % (
                     cse_tag,
                     ",".join(
                         set(independent_inames)-kernel.all_inames())))
diff --git a/loopy/kernel.py b/loopy/kernel.py
index 6af2b8726..408fe45fa 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -156,7 +156,8 @@ class ArrayArg:
     def __repr__(self):
         return "<ArrayArg '%s' of type %s>" % (self.name, self.dtype)
 
-
+class ConstantArrayArg(ArrayArg):
+    pass
 
 class ImageArg:
     def __init__(self, name, dtype, dimensions=None, shape=None):
-- 
GitLab