diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index dd2104d0c5730a155770bb17e6d8a84a9ddb1aee..f377ed1de9944b70a34132c8b3902d910f2567f0 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -233,7 +233,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): (isinstance(ary, (ConstantArg, ArrayArg)) or (isinstance(ary, TemporaryVariable) and ary.base_storage))): # unsubscripted global args are pointers - result = make_var(access_info.array_name)[0] + result = self.make_subscript( + ary, + make_var(access_info.array_name), + (0,)) else: # unsubscripted temp vars are scalars @@ -242,8 +245,11 @@ class ExpressionToCExpressionMapper(IdentityMapper): else: subscript, = access_info.subscripts - result = make_var(access_info.array_name)[simplify_using_aff( - self.kernel, self.rec(subscript, 'i'))] + result = self.make_subscript( + ary, + make_var(access_info.array_name), + simplify_using_aff( + self.kernel, self.rec(subscript, 'i'))) if access_info.vector_index is not None: return self.codegen_state.ast_builder.add_vector_access( @@ -275,8 +281,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): else: offset = 0 - return var(expr.aggregate.name)[ - self.rec(offset + expr.index, 'i')] + return self.make_subscript( + arg, + var(expr.aggregate.name), + self.rec(offset + expr.index, 'i')) elif expr.aggregate.name in self.kernel.temporary_variables: raise RuntimeError("linear indexing is not supported on temporaries: %s" @@ -286,6 +294,9 @@ class ExpressionToCExpressionMapper(IdentityMapper): raise RuntimeError( "nothing known about variable '%s'" % expr.aggregate.name) + def make_subscript(self, array, base_expr, subscript): + return base_expr[subscript] + def map_floor_div(self, expr, type_context): from loopy.symbolic import get_dependencies iname_deps = get_dependencies(expr) & self.kernel.all_inames() @@ -729,7 +740,7 @@ class CExpressionToCodeMapper(RecursiveMapper): if isinstance(expr.function, Variable): func = expr.function.name else: - func = self.rec(expr.function, PREC_CALL) + func = self.rec(expr.function, PREC_CALL+1) return self.parenthesize_if_needed( "%s(%s)" % ( @@ -755,7 +766,7 @@ class CExpressionToCodeMapper(RecursiveMapper): def map_subscript(self, expr, enclosing_prec): return self.parenthesize_if_needed( "%s[%s]" % ( - self.rec(expr.aggregate, PREC_CALL), + self.rec(expr.aggregate, PREC_CALL+1), self.rec(expr.index, PREC_NONE)), enclosing_prec, PREC_CALL) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 432c95ef34cc3d34548effba340386e3e44c9147..aad3fe9941ecd33360b24777643a24cb0472863e 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -647,4 +647,43 @@ class OpenCLCASTBuilder(CASTBuilder): # }}} + +# {{{ volatile mem acccess target + +class VolatileMemExpressionToOpenCLCExpressionMapper( + ExpressionToOpenCLCExpressionMapper): + def make_subscript(self, array, base_expr, subscript): + registry = self.codegen_state.ast_builder.target.get_dtype_registry() + + from loopy.kernel.data import AddressSpace + if array.address_space == AddressSpace.GLOBAL: + aspace = "__global " + elif array.address_space == AddressSpace.GLOBAL: + aspace = "__local " + elif array.address_space == AddressSpace.PRIVATE: + aspace = "" + else: + raise ValueError("unexpected value of address space") + + from pymbolic import var + return var( + "(%s volatile %s *) " + % ( + registry.dtype_to_ctype(array.dtype), + aspace, + ) + )(base_expr)[subscript] + + +class VolatileMemOpenCLCASTBuilder(OpenCLCASTBuilder): + def get_expression_to_c_expression_mapper(self, codegen_state): + return VolatileMemExpressionToOpenCLCExpressionMapper(codegen_state) + + +class VolatileMemOpenCLTarget(OpenCLTarget): + def get_device_ast_builder(self): + return VolatileMemOpenCLCASTBuilder(self) + +# }}} + # vim: foldmethod=marker diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 34faf0a03d60b5be391c7f49e9baf247093e965a..c5e8d0a7f7a9f70b3afe46e9d04a3bf861066329 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -411,6 +411,9 @@ class PyOpenCLTarget(OpenCLTarget): from loopy.target.pyopencl_execution import PyOpenCLKernelExecutor return PyOpenCLKernelExecutor(queue.context, kernel) + def with_device(self, device): + return type(self)(device) + # }}} @@ -760,4 +763,19 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): # }}} +# {{{ volatile mem acccess target + +class VolatileMemPyOpenCLCASTBuilder(PyOpenCLCASTBuilder): + def get_expression_to_c_expression_mapper(self, codegen_state): + from loopy.target.opencl import \ + VolatileMemExpressionToOpenCLCExpressionMapper + return VolatileMemExpressionToOpenCLCExpressionMapper(codegen_state) + + +class VolatileMemPyOpenCLTarget(PyOpenCLTarget): + def get_device_ast_builder(self): + return VolatileMemPyOpenCLCASTBuilder(self) + +# }}} + # vim: foldmethod=marker diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 27be619870de6939cf3de06751dea65a3fd558c0..0186a279fed37913e4e29a31f1f5c0933b1b2ea9 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -267,7 +267,8 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): from loopy.target.pyopencl import PyOpenCLTarget if isinstance(kernel.target, PyOpenCLTarget): - self.kernel = kernel.copy(target=PyOpenCLTarget(context.devices[0])) + self.kernel = kernel.copy(target=( + kernel.target.with_device(context.devices[0]))) def get_invoker_uncached(self, kernel, codegen_result): generator = PyOpenCLExecutionWrapperGenerator()