diff --git a/loopy/__init__.py b/loopy/__init__.py index 843356bf363f8c918a840daa879676cbe8a8f0ee..379f1c3b84f07a33ff5c70a5cd8ddc585a805cdd 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -20,7 +20,7 @@ class LoopyAdvisory(UserWarning): # {{{ imported user interface -from loopy.kernel import ScalarArg, ArrayArg, ImageArg +from loopy.kernel import ScalarArg, ArrayArg, ConstantArrayArg, ImageArg from loopy.kernel import AutoFitLocalIndexTag, get_dot_dependency_graph from loopy.cse import realize_cse diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index dd4b4dec5cb6c030864fd9ebed649caf8f075ff7..fb3e00bd3618d6a2c2c651945c6bba04dd80bc9c 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -183,7 +183,7 @@ def generate_code(kernel): POD, Value, ArrayOf, Module, Block, Line, Const, LiteralLines, Initializer) - from cgen.opencl import (CLKernel, CLGlobal, CLRequiredWorkGroupSize, + from cgen.opencl import (CLKernel, CLGlobal, CLConstant, CLRequiredWorkGroupSize, CLLocal, CLImage, CLConstant) from loopy.symbolic import LoopyCCodeMapper @@ -207,11 +207,11 @@ def generate_code(kernel): has_double = False has_image = False - from loopy.kernel import ArrayArg, ImageArg + from loopy.kernel import ArrayArg, ConstantArrayArg, ImageArg, ScalarArg args = [] for arg in kernel.args: - if isinstance(arg, ArrayArg): + if isinstance(arg, (ConstantArrayArg, ArrayArg)): arg_decl = restrict_ptr_if_not_nvidia( POD(arg.dtype, arg.name)) if arg_decl.name not in kernel.get_written_variables(): @@ -219,7 +219,10 @@ def generate_code(kernel): arg_decl = CLConstant(Const(arg_decl)) else: arg_decl = Const(arg_decl) - arg_decl = CLGlobal(arg_decl) + if isinstance(arg, ConstantArrayArg): + arg_decl = CLConstant(arg_decl) + else: + arg_decl = CLGlobal(arg_decl) elif isinstance(arg, ImageArg): if arg.name in kernel.get_written_variables(): mode = "w" @@ -229,8 +232,10 @@ def generate_code(kernel): arg_decl = CLImage(arg.dimensions, mode, arg.name) has_image = True - else: + elif isinstance(arg, ScalarArg): arg_decl = Const(POD(arg.dtype, arg.name)) + else: + raise ValueError("argument type not understood: '%s'" % type(arg)) if arg.dtype in [np.float64, np.complex128]: has_double = True diff --git a/loopy/cse.py b/loopy/cse.py index ab576a701a9f08c68b3a3211b09df2259f36baa9..2fe4340ab993c648141fdf336e2bc3653f3c65c1 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -342,8 +342,8 @@ def realize_cse(kernel, cse_tag, dtype, independent_inames=[], if not set(independent_inames) <= kernel.all_inames(): raise ValueError("In CSE realization for '%s': " - "cannot make inames '%s' independent--" - "they don't already exist" % ( + "cannot make iname(s) '%s' independent--" + "it/they don't already exist" % ( cse_tag, ",".join( set(independent_inames)-kernel.all_inames()))) diff --git a/loopy/kernel.py b/loopy/kernel.py index 6af2b8726e453569784b0419b9e4c2dd2d8de96b..408fe45fa1141b9b9de92df5716bf88dad3ae52a 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -156,7 +156,8 @@ class ArrayArg: def __repr__(self): return "<ArrayArg '%s' of type %s>" % (self.name, self.dtype) - +class ConstantArrayArg(ArrayArg): + pass class ImageArg: def __init__(self, name, dtype, dimensions=None, shape=None):