From 6969ca889133b1d00a3a0c7031e0e9e10a0455c2 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 17 Dec 2018 17:22:45 -0600 Subject: [PATCH 1/3] Introduce PyOpenCLTarget.with_device to maintain target type --- loopy/target/pyopencl.py | 3 +++ loopy/target/pyopencl_execution.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 34faf0a03..e8b5e4464 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -411,6 +411,9 @@ class PyOpenCLTarget(OpenCLTarget): from loopy.target.pyopencl_execution import PyOpenCLKernelExecutor return PyOpenCLKernelExecutor(queue.context, kernel) + def with_device(self, device): + return type(self)(device) + # }}} diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 27be61987..0186a279f 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -267,7 +267,8 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): from loopy.target.pyopencl import PyOpenCLTarget if isinstance(kernel.target, PyOpenCLTarget): - self.kernel = kernel.copy(target=PyOpenCLTarget(context.devices[0])) + self.kernel = kernel.copy(target=( + kernel.target.with_device(context.devices[0]))) def get_invoker_uncached(self, kernel, codegen_result): generator = PyOpenCLExecutionWrapperGenerator() -- GitLab From 9482965d06d74c0bd578cd4881e6936e041c54ed Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 17 Dec 2018 17:24:05 -0600 Subject: [PATCH 2/3] Introduce ExpressionToCExpressionMapper.make_subscript to allow overriding subscript code gen --- loopy/target/c/codegen/expression.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index dd2104d0c..f377ed1de 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -233,7 +233,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): (isinstance(ary, (ConstantArg, ArrayArg)) or (isinstance(ary, TemporaryVariable) and ary.base_storage))): # unsubscripted global args are pointers - result = make_var(access_info.array_name)[0] + result = self.make_subscript( + ary, + make_var(access_info.array_name), + (0,)) else: # unsubscripted temp vars are scalars @@ -242,8 +245,11 @@ class ExpressionToCExpressionMapper(IdentityMapper): else: subscript, = access_info.subscripts - result = make_var(access_info.array_name)[simplify_using_aff( - self.kernel, self.rec(subscript, 'i'))] + result = self.make_subscript( + ary, + make_var(access_info.array_name), + simplify_using_aff( + self.kernel, self.rec(subscript, 'i'))) if access_info.vector_index is not None: return self.codegen_state.ast_builder.add_vector_access( @@ -275,8 +281,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): else: offset = 0 - return var(expr.aggregate.name)[ - self.rec(offset + expr.index, 'i')] + return self.make_subscript( + arg, + var(expr.aggregate.name), + self.rec(offset + expr.index, 'i')) elif expr.aggregate.name in self.kernel.temporary_variables: raise RuntimeError("linear indexing is not supported on temporaries: %s" @@ -286,6 +294,9 @@ class ExpressionToCExpressionMapper(IdentityMapper): raise RuntimeError( "nothing known about variable '%s'" % expr.aggregate.name) + def make_subscript(self, array, base_expr, subscript): + return base_expr[subscript] + def map_floor_div(self, expr, type_context): from loopy.symbolic import get_dependencies iname_deps = get_dependencies(expr) & self.kernel.all_inames() @@ -729,7 +740,7 @@ class CExpressionToCodeMapper(RecursiveMapper): if isinstance(expr.function, Variable): func = expr.function.name else: - func = self.rec(expr.function, PREC_CALL) + func = self.rec(expr.function, PREC_CALL+1) return self.parenthesize_if_needed( "%s(%s)" % ( @@ -755,7 +766,7 @@ class CExpressionToCodeMapper(RecursiveMapper): def map_subscript(self, expr, enclosing_prec): return self.parenthesize_if_needed( "%s[%s]" % ( - self.rec(expr.aggregate, PREC_CALL), + self.rec(expr.aggregate, PREC_CALL+1), self.rec(expr.index, PREC_NONE)), enclosing_prec, PREC_CALL) -- GitLab From 50f1c8b6e53a72f63f74c5388251fd8898af5377 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 17 Dec 2018 17:24:49 -0600 Subject: [PATCH 3/3] Introduce VolatileMemOpenCLTarget, VolatileMemPyOpenCLTarget --- loopy/target/opencl.py | 39 +++++++++++++++++++++++++++++++++++++++ loopy/target/pyopencl.py | 15 +++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 432c95ef3..aad3fe994 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -647,4 +647,43 @@ class OpenCLCASTBuilder(CASTBuilder): # }}} + +# {{{ volatile mem acccess target + +class VolatileMemExpressionToOpenCLCExpressionMapper( + ExpressionToOpenCLCExpressionMapper): + def make_subscript(self, array, base_expr, subscript): + registry = self.codegen_state.ast_builder.target.get_dtype_registry() + + from loopy.kernel.data import AddressSpace + if array.address_space == AddressSpace.GLOBAL: + aspace = "__global " + elif array.address_space == AddressSpace.GLOBAL: + aspace = "__local " + elif array.address_space == AddressSpace.PRIVATE: + aspace = "" + else: + raise ValueError("unexpected value of address space") + + from pymbolic import var + return var( + "(%s volatile %s *) " + % ( + registry.dtype_to_ctype(array.dtype), + aspace, + ) + )(base_expr)[subscript] + + +class VolatileMemOpenCLCASTBuilder(OpenCLCASTBuilder): + def get_expression_to_c_expression_mapper(self, codegen_state): + return VolatileMemExpressionToOpenCLCExpressionMapper(codegen_state) + + +class VolatileMemOpenCLTarget(OpenCLTarget): + def get_device_ast_builder(self): + return VolatileMemOpenCLCASTBuilder(self) + +# }}} + # vim: foldmethod=marker diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index e8b5e4464..c5e8d0a7f 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -763,4 +763,19 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): # }}} +# {{{ volatile mem acccess target + +class VolatileMemPyOpenCLCASTBuilder(PyOpenCLCASTBuilder): + def get_expression_to_c_expression_mapper(self, codegen_state): + from loopy.target.opencl import \ + VolatileMemExpressionToOpenCLCExpressionMapper + return VolatileMemExpressionToOpenCLCExpressionMapper(codegen_state) + + +class VolatileMemPyOpenCLTarget(PyOpenCLTarget): + def get_device_ast_builder(self): + return VolatileMemPyOpenCLCASTBuilder(self) + +# }}} + # vim: foldmethod=marker -- GitLab