diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index f4c48443f9062bf362e9a27681d2967b8f82807d..323951a63a7f9c370bfd0a357ae585a6bd1b336b 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -116,7 +116,8 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): # }}} (assignee_var_name, assignee_indices), = insn.assignees_and_indices() - lhs_dtype = kernel.get_var_descriptor(assignee_var_name).dtype + lhs_var = kernel.get_var_descriptor(assignee_var_name) + lhs_dtype = lhs_var.dtype if insn.atomicity is not None: lhs_atomicity = [ @@ -147,7 +148,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): elif isinstance(lhs_atomicity, AtomicUpdate): codegen_state.seen_atomic_dtypes.add(lhs_dtype) result = kernel.target.generate_atomic_update( - kernel, codegen_state, lhs_atomicity, + kernel, codegen_state, lhs_atomicity, lhs_var, insn.assignee, insn.expression, lhs_dtype, rhs_type_context) diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index c8c2324e828a6c69696c4e6a828eb79c29d230e8..42a9e5a3dc4c88cceadb3399b5399d609b877cfb 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -133,9 +133,9 @@ class TargetBase(object): def get_image_arg_decl(self, name, shape, num_target_axes, dtype, is_written): raise NotImplementedError() - def generate_atomic_update(self, kernel, codegen_state, lhs_atomicity, + def generate_atomic_update(self, kernel, codegen_state, lhs_atomicity, lhs_var, lhs_expr, rhs_expr, lhs_dtype): - raise NotImplementedError("atomic update") + raise NotImplementedError("atomic update in target %s" % type(self).__name__) # }}} diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 43c4ea7dc30f139261a3c8ea46e32860c265412f..70bce2331098860ada34c3fc4d865f3ba9207bc1 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -411,7 +411,7 @@ class OpenCLTarget(CTarget): # {{{ code generation for atomic update - def generate_atomic_update(self, kernel, codegen_state, lhs_atomicity, + def generate_atomic_update(self, kernel, codegen_state, lhs_atomicity, lhs_var, lhs_expr, rhs_expr, lhs_dtype, rhs_type_context): from pymbolic.mapper.stringifier import PREC_NONE @@ -425,7 +425,7 @@ class OpenCLTarget(CTarget): old_val_var = codegen_state.var_name_generator("loopy_old_val") new_val_var = codegen_state.var_name_generator("loopy_new_val") - from loopy.kernel.data import TemporaryVariable + from loopy.kernel.data import TemporaryVariable, temp_var_scope ecm = codegen_state.expression_to_code_mapper.with_assignments( { old_val_var: TemporaryVariable(old_val_var, lhs_dtype), @@ -463,9 +463,25 @@ class OpenCLTarget(CTarget): else: assert False + from loopy.kernel.data import TemporaryVariable, GlobalArg + if isinstance(lhs_var, GlobalArg): + var_kind = "__global" + elif ( + isinstance(lhs_var, TemporaryVariable) + and lhs_var.scope == temp_var_scope.LOCAL): + var_kind = "__local" + elif ( + isinstance(lhs_var, TemporaryVariable) + and lhs_var.scope == temp_var_scope.GLOBAL): + var_kind = "__global" + else: + raise LoopyError("unexpected kind of variable '%s' in " + "atomic operation: " + % (lhs_var.name, type(lhs_var).__name__)) + old_val = "*(%s *) &" % ctype + old_val new_val = "*(%s *) &" % ctype + new_val - cast_str = "(__global %s *) " % ctype + cast_str = "(%s %s *) " % (var_kind, ctype) return Block([ POD(self, NumpyType(lhs_dtype.dtype), old_val_var),