diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 33d40385b529f72e54da65238304e87bdb2cddab..088bedc9eeb77723483149dcc445d8aa84ddbbb7 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -262,6 +262,9 @@ These are usually key-value pairs. The following attributes are recognized: given instruction groups. See :class:`InstructionBase.conflicts_with_groups`. +* ``atomic`` The update embodied by the assignment is carried out + atomically. See :attr:`Assignment.atomicity` for precise semantics. + .. _expression-syntax: Expressions @@ -286,6 +289,19 @@ C Block Instructions .. autoclass:: CInstruction +Atomic Operations +^^^^^^^^^^^^^^^^^ + +.. autoclass:: memory_ordering + +.. autoclass:: memory_scope + +.. autoclass:: VarAtomicity + +.. autoclass:: AtomicInit + +.. autoclass:: AtomicUpdate + .. }}} Data: Arguments and Temporaries diff --git a/loopy/__init__.py b/loopy/__init__.py index 5e3ad508596a20cce8a084e4d70604cdafa47c79..14bce615a29ef20ef3b5d18b2083cf0cdd98e2f4 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -40,6 +40,7 @@ from loopy.kernel.data import ( auto, KernelArgument, ValueArg, GlobalArg, ConstantArg, ImageArg, + memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate, InstructionBase, Assignment, ExpressionInstruction, CInstruction, TemporaryVariable, SubstitutionRule) @@ -125,6 +126,8 @@ __all__ = [ "LoopKernel", "kernel_state", "KernelArgument", + "memory_ordering", "memory_scope", "VarAtomicity", + "AtomicInit", "AtomicUpdate", "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", "TemporaryVariable", "SubstitutionRule", diff --git a/loopy/check.py b/loopy/check.py index 5a1c9e83b52e08f0874f61b500d853b925201731..5ae22cac5e5af8b727ae0ba577fc05a8e571b8a7 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -373,6 +373,37 @@ def pre_schedule_checks(kernel): # {{{ pre-code-generation checks +def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel): + from loopy.kernel.data import ArrayBase, Assignment + from loopy.types import AtomicType + atomicity_candidates = ( + set(v.name for v in six.itervalues(kernel.temporary_variables) + if isinstance(v.dtype, AtomicType)) + | + set(v.name for v in kernel.args + if isinstance(v, ArrayBase) + and isinstance(v.dtype, AtomicType))) + + for insn in kernel.instructions: + if not isinstance(insn, Assignment): + continue + + atomic_accesses = set(a.var_name for a in insn.atomicity) + if not atomic_accesses <= atomicity_candidates: + raise LoopyError("atomic access in instruction '%s' to " + "non-atomic variable(s) '%s'" + % (insn.id, + ",".join(atomic_accesses - atomicity_candidates))) + + accessed_atomic_vars = insn.dependency_names() & atomicity_candidates + if not accessed_atomic_vars <= atomic_accesses: + raise LoopyError("atomic variable(s) '%s' in instruction '%s' " + "used in non-atomic access" + % ( + ",".join(accessed_atomic_vars - atomic_accesses), + insn.id)) + + def check_that_shapes_and_strides_are_arguments(kernel): from loopy.kernel.data import ValueArg from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag @@ -383,7 +414,7 @@ def check_that_shapes_and_strides_are_arguments(kernel): arg.name for arg in kernel.args if isinstance(arg, ValueArg) - and arg.dtype.kind == "i") + and arg.dtype.is_integral()) for arg in kernel.args: if isinstance(arg, ArrayBase): @@ -417,6 +448,7 @@ def pre_codegen_checks(kernel): try: logger.info("pre-codegen check %s: start" % kernel.name) + check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel) kernel.target.pre_codegen_check(kernel) check_that_shapes_and_strides_are_arguments(kernel) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 91af52cde0000ce811172be53bf21e13ce5fde97..058af59bcc59593189948a0fbd15c7d09349f05c 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -425,11 +425,12 @@ class ImplementedDataInfo(Record): offset_for_name=None, stride_for_name_and_axis=None, allows_offset=None): - from loopy.tools import PicklableDtype + from loopy.types import LoopyType + assert isinstance(dtype, LoopyType) Record.__init__(self, name=name, - picklable_dtype=PicklableDtype(dtype, target=target), + dtype=dtype, cgen_declarator=cgen_declarator, arg_class=arg_class, base_name=base_name, @@ -441,14 +442,6 @@ class ImplementedDataInfo(Record): stride_for_name_and_axis=stride_for_name_and_axis, allows_offset=allows_offset) - @property - def dtype(self): - from loopy.tools import PicklableDtype - if isinstance(self.picklable_dtype, PicklableDtype): - return self.picklable_dtype.dtype - else: - return self.picklable_dtype - # }}} @@ -528,7 +521,7 @@ def generate_code(kernel, device=None): allow_complex = False for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): - if var.dtype.kind == "c": + if var.dtype.involves_complex(): allow_complex = True # }}} @@ -619,7 +612,7 @@ def generate_body(kernel): allow_complex = False for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): - if var.dtype.kind == "c": + if var.dtype.involves_complex(): allow_complex = True seen_dtypes = set() diff --git a/loopy/compiled.py b/loopy/compiled.py index 32536021dcfb3a1460f23af7e066aff9bf772f46..2125fd66347c99adb4014a60c34a6ce44bc05e95 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -1,8 +1,4 @@ -from __future__ import division, with_statement -from __future__ import absolute_import -import six -from six.moves import range -from six.moves import zip +from __future__ import division, with_statement, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -26,6 +22,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +import six +from six.moves import range, zip import sys import numpy as np @@ -34,6 +32,7 @@ from loopy.diagnostic import ParameterFinderWarning from pytools.py_codegen import ( Indentation, PythonFunctionGenerator) from loopy.diagnostic import LoopyError +from loopy.types import NumpyType import logging logger = logging.getLogger(__name__) @@ -162,7 +161,7 @@ def generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, options if len(deps) == 1: integer_arg_var, = deps - if kernel.arg_dict[integer_arg_var.name].dtype.kind == "i": + if kernel.arg_dict[integer_arg_var.name].dtype.is_integral(): from pymbolic.algorithm import solve_affine_equations_for try: # friggin' overkill :) @@ -279,7 +278,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, option if not options.skip_arg_checks: gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" % (arg.name, impl_array_name, stride_impl_axis, - base_arg.dtype.itemsize)) + base_arg.dtype.dtype.itemsize)) gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' is " "not divisible by its dtype itemsize\"" @@ -360,16 +359,20 @@ def generate_value_arg_setup(gen, kernel, cl_kernel, impl_arg_info, options): "be supplied") """.format(name=arg.name)) - if sys.version_info < (2, 7) and arg.dtype.kind == "i": + if sys.version_info < (2, 7) and arg.dtype.is_integral(): gen("# cast to long to avoid trouble with struct packing") gen("%s = long(%s)" % (arg.name, arg.name)) gen("") - if arg.dtype.char == "V": + if arg.dtype.is_composite(): gen("cl_kernel.set_arg(%d, %s)" % (cl_arg_idx, arg.name)) cl_arg_idx += 1 - elif arg.dtype.kind == "c": + elif arg.dtype.is_complex(): + assert isinstance(arg.dtype, NumpyType) + + dtype = arg.dtype + if warn_about_arg_count_bug: from warnings import warn warn("{knl_name}: arguments include complex numbers, and " @@ -378,15 +381,15 @@ def generate_value_arg_setup(gen, kernel, cl_kernel, impl_arg_info, options): "disabled".format( knl_name=kernel.name)) - if arg.dtype == np.complex64: + if dtype.numpy_dtype == np.complex64: arg_char = "f" - elif arg.dtype == np.complex128: + elif dtype.numpy_dtype == np.complex128: arg_char = "d" else: - raise TypeError("unexpected complex type: %s" % arg.dtype) + raise TypeError("unexpected complex type: %s" % dtype) if (work_around_arg_count_bug - and arg.dtype == np.complex128 + and dtype.numpy_dtype == np.complex128 and fp_arg_count + 2 <= 8): gen( "buf = _lpy_pack('{arg_char}', {arg_var}.real)" @@ -415,15 +418,19 @@ def generate_value_arg_setup(gen, kernel, cl_kernel, impl_arg_info, options): fp_arg_count += 2 - else: - if arg.dtype.kind == "f": + elif isinstance(arg.dtype, NumpyType): + if arg.dtype.dtype.kind == "f": fp_arg_count += 1 gen("cl_kernel.set_arg(%d, _lpy_pack('%s', %s))" - % (cl_arg_idx, arg.dtype.char, arg.name)) + % (cl_arg_idx, arg.dtype.dtype.char, arg.name)) cl_arg_idx += 1 + else: + raise LoopyError("do not know how to pass argument of type '%s'" + % arg.dtype) + gen("") gen("# }}}") @@ -510,6 +517,10 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, options, if is_written and arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ and arg.shape is not None: + if not isinstance(arg.dtype, NumpyType): + raise LoopyError("do not know how to pass arg of type '%s'" + % arg.dtype) + possibly_made_by_loopy = True gen("_lpy_made_by_loopy = False") gen("") @@ -520,7 +531,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, options, for i in range(num_axes): gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) - itemsize = kernel_arg.dtype.itemsize + itemsize = kernel_arg.dtype.numpy_dtype.itemsize for i in range(num_axes): gen("_lpy_strides_%d = %s" % (i, strify( itemsize*arg.unvec_strides[i]))) @@ -550,7 +561,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, options, name=arg.name, shape=strify(sym_shape), strides=strify(sym_strides), - dtype=python_dtype_str(arg.dtype))) + dtype=python_dtype_str(kernel_arg.dtype.numpy_dtype))) if not options.skip_arg_checks: for i in range(num_axes): @@ -575,7 +586,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, options, with Indentation(gen): gen("if %s.dtype != %s:" - % (arg.name, python_dtype_str(kernel_arg.dtype))) + % (arg.name, python_dtype_str(kernel_arg.dtype.numpy_dtype))) with Indentation(gen): gen("raise TypeError(\"dtype mismatch on argument '%s' " "(got: %%s, expected: %s)\" %% %s.dtype)" @@ -630,7 +641,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, options, # }}} if arg.unvec_strides and kernel_arg.dim_tags: - itemsize = kernel_arg.dtype.itemsize + itemsize = kernel_arg.dtype.numpy_dtype.itemsize sym_strides = tuple( itemsize*s_i for s_i in arg.unvec_strides) gen("if %s.strides != %s:" diff --git a/loopy/expression.py b/loopy/expression.py index 62c2278be5b863105e45a8c0c6d517cdf2b92fd2..a363013ea0549827438124150a1e4c0c2af94c64 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -28,6 +28,7 @@ import numpy as np from pymbolic.mapper import CombineMapper, RecursiveMapper from loopy.tools import is_integer +from loopy.types import NumpyType from loopy.codegen import Unvectorizable from loopy.diagnostic import ( LoopyError, @@ -41,13 +42,13 @@ from loopy.diagnostic import ( # or None for 'no known context'. def dtype_to_type_context(target, dtype): - dtype = np.dtype(dtype) + from loopy.types import NumpyType - if dtype.kind == 'i': + if dtype.is_integral(): return 'i' - if dtype in [np.float64, np.complex128]: + if isinstance(dtype, NumpyType) and dtype.dtype in [np.float64, np.complex128]: return 'd' - if dtype in [np.float32, np.complex64]: + if isinstance(dtype, NumpyType) and dtype.dtype in [np.float32, np.complex64]: return 'f' if target.is_vector_dtype(dtype): return dtype_to_type_context(target, dtype.fields["x"][0]) @@ -76,8 +77,21 @@ class TypeInferenceMapper(CombineMapper): @staticmethod def combine(dtypes): + # dtypes may just be a generator expr dtypes = list(dtypes) + from loopy.types import NumpyType + if not all(isinstance(dtype, NumpyType) for dtype in dtypes): + from pytools import is_single_valued, single_valued + if not is_single_valued(dtypes): + raise TypeInferenceFailure( + "Nothing known about operations between '%s'" + % ", ".join(str(dt) for dt in dtypes)) + + return single_valued(dtypes) + + dtypes = [dtype.dtype for dtype in dtypes] + result = dtypes.pop() while dtypes: other = dtypes.pop() @@ -107,7 +121,7 @@ class TypeInferenceMapper(CombineMapper): "nothing known about result of operation on " "'%s' and '%s'" % (result, other)) - return result + return NumpyType(result) def map_sum(self, expr): dtypes = [] @@ -120,7 +134,7 @@ class TypeInferenceMapper(CombineMapper): dtypes.append(dtype) from pytools import all - if all(dtype.kind == "i" for dtype in dtypes): + if all(dtype.is_integral() for dtype in dtypes): dtypes.extend(small_integer_dtypes) return self.combine(dtypes) @@ -131,9 +145,9 @@ class TypeInferenceMapper(CombineMapper): n_dtype = self.rec(expr.numerator) d_dtype = self.rec(expr.denominator) - if n_dtype.kind in "iu" and d_dtype.kind in "iu": + if n_dtype.is_integral() and d_dtype.is_integral(): # both integers - return np.dtype(np.float64) + return NumpyType(np.dtype(np.float64)) else: return self.combine([n_dtype, d_dtype]) @@ -143,20 +157,20 @@ class TypeInferenceMapper(CombineMapper): for tp in [np.int32, np.int64]: iinfo = np.iinfo(tp) if iinfo.min <= expr <= iinfo.max: - return np.dtype(tp) + return NumpyType(np.dtype(tp)) else: raise TypeInferenceFailure("integer constant '%s' too large" % expr) dt = np.asarray(expr).dtype if hasattr(expr, "dtype"): - return expr.dtype + return NumpyType(expr.dtype) elif isinstance(expr, np.number): # Numpy types are sized - return np.dtype(type(expr)) + return NumpyType(np.dtype(type(expr))) elif dt.kind == "f": # deduce the smaller type by default - return np.dtype(np.float32) + return NumpyType(np.dtype(np.float32)) elif dt.kind == "c": if np.complex64(expr) == np.complex128(expr): # (COMPLEX_GUESS_LOGIC) @@ -245,13 +259,13 @@ class TypeInferenceMapper(CombineMapper): def map_lookup(self, expr): agg_result = self.rec(expr.aggregate) - dtype, offset = agg_result.fields[expr.name] - return dtype + dtype, offset = agg_result.numpy_dtype.fields[expr.name] + return NumpyType(dtype) def map_comparison(self, expr): # "bool" is unusable because OpenCL's bool has indeterminate memory # format. - return np.dtype(np.int32) + return NumpyType(np.dtype(np.int32)) map_logical_not = map_comparison map_logical_and = map_comparison diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index fb081a959523114ee5fc823eec59420f4adf3457..22a45fac996f6eaf1737fb28ab51ab9ac4f9ead7 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -254,10 +254,11 @@ class LoopKernel(RecordWithoutPickling): # }}} - index_dtype = np.dtype(index_dtype) - if index_dtype.kind != 'i': + from loopy.types import to_loopy_type + index_dtype = to_loopy_type(index_dtype).with_target(target) + if not index_dtype.is_integral(): raise TypeError("index_dtype must be an integer") - if np.iinfo(index_dtype).min >= 0: + if np.iinfo(index_dtype.numpy_dtype).min >= 0: raise TypeError("index_dtype must be signed") if get_grid_sizes is not None: @@ -1043,6 +1044,9 @@ class LoopKernel(RecordWithoutPickling): options.append("priority=%d" % insn.priority) if insn.tags: options.append("tags=%s" % ":".join(insn.tags)) + if isinstance(insn, lp.Assignment) and insn.atomicity: + options.append("atomic=%s" % ":".join( + str(a) for a in insn.atomicity)) if insn.groups: options.append("groups=%s" % ":".join(insn.groups)) if insn.conflicts_with_groups: diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 6deb2a348b606d7d717b290e900d7bda6169fb4e..1ad83c0a718b60194cb5789ced7d0a9c7674de75 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -551,7 +551,8 @@ class ArrayBase(Record): allowed_extra_kwargs = [] def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, - dim_names=None, strides=None, order=None, **kwargs): + dim_names=None, strides=None, order=None, for_atomic=False, + **kwargs): """ All of the following are optional. Specify either strides or shape. @@ -611,6 +612,9 @@ class ArrayBase(Record): :arg order: "F" or "C" for C (row major) or Fortran (column major). Defaults to the *default_order* argument passed to :func:`loopy.make_kernel`. + :arg for_atomic: + Whether the array is declared for atomic access, and, if necessary, + using atomic-capable data types. :arg offset: Offset from the beginning of the buffer to the point from which the strides are counted. May be one of @@ -629,22 +633,9 @@ class ArrayBase(Record): import loopy as lp - if dtype is not None and dtype is not lp.auto: - from loopy.tools import PicklableDtype - if not isinstance(dtype, PicklableDtype): - picklable_dtype = PicklableDtype(dtype) - else: - picklable_dtype = dtype - - if picklable_dtype.dtype == object: - raise TypeError("loopy does not directly support object arrays " - "(object dtype encountered on array '%s') " - "-- you may want to tag the relevant array axis as 'sep'" - % name) - else: - picklable_dtype = dtype - - del dtype + from loopy.types import to_loopy_type + dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True, + for_atomic=for_atomic) strides_known = strides is not None and strides is not lp.auto shape_known = shape is not None and shape is not lp.auto @@ -766,7 +757,7 @@ class ArrayBase(Record): Record.__init__(self, name=name, - picklable_dtype=picklable_dtype, + dtype=dtype, shape=shape, dim_tags=dim_tags, offset=offset, @@ -781,7 +772,7 @@ class ArrayBase(Record): return ( type(self) == type(other) and self.name == other.name - and self.picklable_dtype == other.picklable_dtype + and self.dtype == other.dtype and istoee(self.shape, other.shape) and self.dim_tags == other.dim_tags and isee(self.offset, other.offset) @@ -792,23 +783,6 @@ class ArrayBase(Record): def __ne__(self, other): return not self.__eq__(other) - @property - def dtype(self): - from loopy.tools import PicklableDtype - if isinstance(self.picklable_dtype, PicklableDtype): - return self.picklable_dtype.dtype - else: - return self.picklable_dtype - - def get_copy_kwargs(self, **kwargs): - result = Record.get_copy_kwargs(self, **kwargs) - if "dtype" not in result: - result["dtype"] = self.dtype - - del result["picklable_dtype"] - - return result - def stringify(self, include_typename): import loopy as lp diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 830fe0efadb0425673a9c6ac4f62050d384dcd3f..aedc1edc650d570cdeec1ecca8dc08989678df65 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -30,6 +30,7 @@ from loopy.tools import intern_frozenset_of_ids from loopy.symbolic import IdentityMapper, WalkMapper from loopy.kernel.data import ( InstructionBase, Assignment, SubstitutionRule) +from loopy.diagnostic import LoopyError import islpy as isl from islpy import dim_type @@ -162,6 +163,8 @@ def parse_insn(insn): and *inames_to_dup* is None or a list of tuples `(old, new)`. """ + import loopy as lp + insn_match = INSN_RE.match(insn) subst_match = SUBST_RE.match(insn) if insn_match is not None and subst_match is not None: @@ -189,6 +192,18 @@ def parse_insn(insn): "the following error occurred:" % groups["rhs"]) raise + from pymbolic.primitives import Variable, Subscript, Call + if isinstance(lhs, Variable): + assignee_name = lhs.name + elif isinstance(lhs, Subscript): + assignee_name = lhs.aggregate.name + elif isinstance(lhs, Call): + assignee_name = None + assert subst_match is not None + else: + raise LoopyError("left hand side of assignment '%s' must " + "be variable or subscript" % lhs) + if insn_match is not None: depends_on = None depends_on_is_final = False @@ -201,6 +216,7 @@ def parse_insn(insn): forced_iname_deps = frozenset() predicates = frozenset() tags = () + atomicity = () if groups["options"] is not None: for option in groups["options"].split(","): @@ -216,13 +232,13 @@ def parse_insn(insn): opt_key = option[:equal_idx].strip() opt_value = option[equal_idx+1:].strip() - if opt_key == "id": + if opt_key == "id" and opt_value is not None: insn_id = intern(opt_value) - elif opt_key == "id_prefix": + elif opt_key == "id_prefix" and opt_value is not None: insn_id = UniqueName(opt_value) - elif opt_key == "priority": + elif opt_key == "priority" and opt_value is not None: priority = int(opt_value) - elif opt_key == "dup": + elif opt_key == "dup" and opt_value is not None: for value in opt_value.split(":"): arrow_idx = value.find("->") if arrow_idx >= 0: @@ -231,7 +247,7 @@ def parse_insn(insn): else: inames_to_dup.append((value, None)) - elif opt_key == "dep": + elif opt_key == "dep" and opt_value is not None: if opt_value.startswith("*"): depends_on_is_final = True opt_value = (opt_value[1:]).strip() @@ -240,17 +256,17 @@ def parse_insn(insn): intern(dep.strip()) for dep in opt_value.split(":") if dep.strip()) - elif opt_key == "groups": + elif opt_key == "groups" and opt_value is not None: insn_groups = frozenset( intern(grp.strip()) for grp in opt_value.split(":") if grp.strip()) - elif opt_key == "conflicts": + elif opt_key == "conflicts" and opt_value is not None: conflicts_with_groups = frozenset( intern(grp.strip()) for grp in opt_value.split(":") if grp.strip()) - elif opt_key == "inames": + elif opt_key == "inames" and opt_value is not None: if opt_value.startswith("+"): forced_iname_deps_is_final = False opt_value = (opt_value[1:]).strip() @@ -259,16 +275,32 @@ def parse_insn(insn): forced_iname_deps = intern_frozenset_of_ids(opt_value.split(":")) - elif opt_key == "if": + elif opt_key == "if" and opt_value is not None: predicates = intern_frozenset_of_ids(opt_value.split(":")) - elif opt_key == "tags": + elif opt_key == "tags" and opt_value is not None: tags = tuple( tag.strip() for tag in opt_value.split(":") if tag.strip()) + elif opt_key == "atomic": + if opt_value is None: + atomicity = atomicity + ( + lp.AtomicUpdate(assignee_name),) + else: + for v in opt_value.split(":"): + if v == "init": + atomicity = atomicity + ( + lp.AtomicInit(assignee_name),) + else: + raise LoopyError("atomicity directive not " + "understood: %s" + % v) + else: - raise ValueError("unrecognized instruction option '%s'" + raise ValueError( + "unrecognized instruction option '%s' " + "(maybe a missing/extraneous =value?)" % opt_key) if groups["temp_var_type"] is not None: @@ -280,11 +312,6 @@ def parse_insn(insn): else: temp_var_type = None - from pymbolic.primitives import Variable, Subscript - if not isinstance(lhs, (Variable, Subscript)): - raise RuntimeError("left hand side of assignment '%s' must " - "be variable or subscript" % lhs) - return Assignment( id=( intern(insn_id) @@ -298,6 +325,7 @@ def parse_insn(insn): forced_iname_deps=forced_iname_deps, assignee=lhs, expression=rhs, temp_var_type=temp_var_type, + atomicity=atomicity, priority=priority, predicates=predicates, tags=tags), inames_to_dup diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 5b0cf57e50298febe0580da5fb0efe5756557527..f89c2b631b5928f6c6fc2ace61b8c3684d1e3b8b 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -26,7 +26,7 @@ THE SOFTWARE. from six.moves import intern -import numpy as np +import numpy as np # noqa from pytools import Record, memoize_method from loopy.kernel.array import ArrayBase from loopy.diagnostic import LoopyError @@ -191,32 +191,12 @@ class KernelArgument(Record): kwargs["name"] = intern(kwargs.pop("name")) dtype = kwargs.pop("dtype", None) - - if isinstance(dtype, np.dtype): - from loopy.tools import PicklableDtype - kwargs["picklable_dtype"] = PicklableDtype(dtype) - else: - kwargs["picklable_dtype"] = dtype + from loopy.types import to_loopy_type + kwargs["dtype"] = to_loopy_type( + dtype, allow_auto=True, allow_none=True) Record.__init__(self, **kwargs) - def get_copy_kwargs(self, **kwargs): - result = Record.get_copy_kwargs(self, **kwargs) - if "dtype" not in result: - result["dtype"] = self.dtype - - del result["picklable_dtype"] - - return result - - @property - def dtype(self): - from loopy.tools import PicklableDtype - if isinstance(self.picklable_dtype, PicklableDtype): - return self.picklable_dtype.dtype - else: - return self.picklable_dtype - class GlobalArg(ArrayBase, KernelArgument): min_target_axes = 0 @@ -251,11 +231,9 @@ class ImageArg(ArrayBase, KernelArgument): class ValueArg(KernelArgument): def __init__(self, name, dtype=None, approximately=1000): - from loopy.tools import PicklableDtype - if dtype is not None and not isinstance(dtype, PicklableDtype): - dtype = np.dtype(dtype) - - KernelArgument.__init__(self, name=name, dtype=dtype, + from loopy.types import to_loopy_type + KernelArgument.__init__(self, name=name, + dtype=to_loopy_type(dtype, allow_auto=True, allow_none=True), approximately=approximately) def __str__(self): @@ -660,6 +638,8 @@ class InstructionBase(Record): result.append("priority=%d" % self.priority) if self.tags: result.append("tags=%s" % ":".join(self.tags)) + if self.atomicity: + result.append("atomic=%s" % ":".join(str(a) for a in self.atomicity)) return result @@ -739,6 +719,148 @@ def _get_assignee_and_index(expr): raise RuntimeError("invalid lvalue '%s'" % expr) +class memory_ordering: + """Ordering of atomic operations, defined as in C11 and OpenCL. + + .. attribute:: relaxed + .. attribute:: acquire + .. attribute:: release + .. attribute:: acq_rel + .. attribute:: seq_cst + """ + + relaxed = 0 + acquire = 1 + release = 2 + acq_rel = 3 + seq_cst = 4 + + @staticmethod + def to_string(v): + for i in dir(memory_ordering): + if i.startswith("_"): + continue + + if getattr(memory_ordering, i) == v: + return i + + raise ValueError("Unknown value of memory_ordering") + + +class memory_scope: + """Scope of atomicity, defined as in OpenCL. + + .. attribute:: auto + + Scope matches the accessibility of the variable. + + .. attribute:: work_item + .. attribute:: work_group + .. attribute:: work_device + .. attribute:: all_svm_devices + """ + + work_item = 0 + work_group = 1 + device = 2 + all_svm_devices = 2 + + auto = -1 + + @staticmethod + def to_string(v): + for i in dir(memory_scope): + if i.startswith("_"): + continue + + if getattr(memory_scope, i) == v: + return i + + raise ValueError("Unknown value of memory_scope") + + +class VarAtomicity(object): + """A base class for the description of how atomic access to :attr:`var_name` + shall proceed. + + .. attribute:: var_name + """ + + def __init__(self, var_name): + self.var_name = var_name + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.var_name) + + def __eq__(self, other): + return (type(self) == type(other) + and self.var_name == other.var_name) + + def __ne__(self, other): + return not self.__eq__(other) + + +class AtomicInit(VarAtomicity): + """Describes initialization of an atomic variable. A subclass of + :class:`VarAtomicity`. + """ + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + super(AtomicInit, self).update_persistent_hash(key_hash, key_builder) + key_builder.rec(key_hash, "AtomicInit") + + def __str__(self): + return "update[%s]%s/%s" % ( + self.var_name, + memory_ordering.to_string(self.ordering), + memory_scope.to_string(self.scope)) + + +class AtomicUpdate(VarAtomicity): + """Properties of an atomic operation. A subclass of :class:`VarAtomicity`. + + .. attribute:: ordering + + One of the values from :class:`memory_ordering` + + .. attribute:: scope + + One of the values from :class:`memory_scope` + """ + + ordering = memory_ordering.seq_cst + scope = memory_scope.auto + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + super(AtomicUpdate, self).update_persistent_hash(key_hash, key_builder) + key_builder.rec(key_hash, "AtomicUpdate") + key_builder.rec(key_hash, self.ordering) + key_builder.rec(key_hash, self.scope) + + def __eq__(self, other): + return (super(AtomicUpdate, self).__eq__(other) + and self.ordering == other.ordering + and self.scope == other.scope) + + def __str__(self): + return "update[%s]%s/%s" % ( + self.var_name, + memory_ordering.to_string(self.ordering), + memory_scope.to_string(self.scope)) + + # {{{ assignment class Assignment(InstructionBase): @@ -755,11 +877,45 @@ class Assignment(InstructionBase): if not *None*, a type that will be assigned to the new temporary variable created from the assignee + .. attribute:: atomicity + + A tuple of instances of :class:`VarAtomicity`. Together, they describe + to what extent the assignment is to be carried out in a way that + involves atomic operations. + + To describe an atomic update, any memory reads of *exact* occurrences + of the left-hand side expression of the assignment in the right hand + side are treated , together with the "memory write" part of the + assignment, as part of one single atomic update. + + .. note:: + + Exact identity of the LHS with RHS subexpressions is required for + an atomic update to be recognized. For example, the following update + will not be recognized as an update:: + + z[i] = z[i+1-1] + a {atomic} + + :mod:`loopy` may to evaluate the right-hand side *multiple times* + as part of a single assignment. It is up to the user to ensure that + this retains correct semantics. + + For example, the following assignment:: + + z[i] = f(z[i]) + a {atomic} + + may generate the following (pseudo-)code:: + + DO + READ ztemp_old = z[i] + EVALUATE ztemp_new = f(ztemp_old) + a + WHILE compare_and_swap(z[i], ztemp_new, ztemp_old) did not succeed + .. automethod:: __init__ """ fields = InstructionBase.fields | \ - set("assignee expression temp_var_type".split()) + set("assignee expression temp_var_type atomicity".split()) def __init__(self, assignee, expression, @@ -771,7 +927,8 @@ class Assignment(InstructionBase): forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), boostable=None, boostable_into=None, tags=None, - temp_var_type=None, priority=0, predicates=frozenset(), + temp_var_type=None, atomicity=(), + priority=0, predicates=frozenset(), insn_deps=None, insn_deps_is_final=None): InstructionBase.__init__(self, @@ -807,6 +964,7 @@ class Assignment(InstructionBase): self.assignee = assignee self.expression = expression self.temp_var_type = temp_var_type + self.atomicity = atomicity # {{{ implement InstructionBase interface diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index d1176bc4af1d99b62860364702549b9a1b6b7eda..4d92316bf62eea1f1645a731fd2556294cd3f175 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -28,6 +28,7 @@ import numpy as np from loopy.symbolic import FunctionIdentifier from loopy.diagnostic import LoopyError +from loopy.types import NumpyType class ReductionOperation(object): @@ -122,7 +123,7 @@ class ProductReductionOperation(ScalarReductionOperation): def get_le_neutral(dtype): """Return a number y that satisfies (x <= y) for all y.""" - if dtype.kind == "f": + if dtype.numpy_dtype.kind == "f": # OpenCL 1.1, section 6.11.2 return var("INFINITY") else: @@ -153,17 +154,17 @@ ARGEXT_STRUCT_DTYPES = {} class _ArgExtremumReductionOperation(ReductionOperation): def prefix(self, dtype): - return "loopy_arg%s_%s" % (self.which, dtype.type.__name__) + return "loopy_arg%s_%s" % (self.which, dtype.numpy_dtype.type.__name__) def result_dtype(self, target, dtype, inames): try: return ARGEXT_STRUCT_DTYPES[dtype] except KeyError: struct_dtype = np.dtype([("value", dtype), ("index", np.int32)]) - ARGEXT_STRUCT_DTYPES[dtype] = struct_dtype + ARGEXT_STRUCT_DTYPES[dtype] = NumpyType(struct_dtype, target) target.get_or_register_dtype(self.prefix(dtype)+"_result", struct_dtype) - return struct_dtype + return ARGEXT_STRUCT_DTYPES[dtype] def neutral_element(self, dtype, inames): return ArgExtFunction(self, dtype, "init", inames)() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 4c75cfd250807c6959c1e5167465d34b029d762e..e30d3bcb3f441036bab6558bc2d8691031a1c2e4 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -43,7 +43,7 @@ def prepare_for_caching(kernel): new_args = [] for arg in kernel.args: - dtype = arg.picklable_dtype + dtype = arg.dtype if dtype is not None and dtype is not lp.auto: dtype = dtype.with_target(kernel.target) @@ -51,7 +51,7 @@ def prepare_for_caching(kernel): new_temporary_variables = {} for name, temp in six.iteritems(kernel.temporary_variables): - dtype = temp.picklable_dtype + dtype = temp.dtype if dtype is not None and dtype is not lp.auto: dtype = dtype.with_target(kernel.target) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index d931ebf3373b7ab455787f5be7db7360cc042e66..6a596454018802c0a5a1e837272c106ffa7c55b3 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -37,6 +37,7 @@ from loopy.expression import dtype_to_type_context, TypeInferenceMapper from loopy.diagnostic import LoopyError from loopy.tools import is_integer +from loopy.types import LoopyType # {{{ C code mapper @@ -55,6 +56,8 @@ class LoopyCCodeMapper(RecursiveMapper): def infer_type(self, expr): result = self.type_inf_mapper(expr) + assert isinstance(result, LoopyType) + self.codegen_state.seen_dtypes.add(result) return result @@ -91,11 +94,11 @@ class LoopyCCodeMapper(RecursiveMapper): actual_type = self.infer_type(expr) - if (actual_type.kind == "c" and needed_dtype.kind == "c" + if (actual_type.is_complex() and needed_dtype.is_complex() and actual_type != needed_dtype): result = RecursiveMapper.rec(self, expr, PREC_NONE, type_context) return "%s_cast(%s)" % (self.complex_type_name(needed_dtype), result) - elif actual_type.kind != "c" and needed_dtype.kind == "c": + elif not actual_type.is_complex() and needed_dtype.is_complex(): result = RecursiveMapper.rec(self, expr, PREC_NONE, type_context) return "%s_fromreal(%s)" % (self.complex_type_name(needed_dtype), result) else: @@ -459,9 +462,13 @@ class LoopyCCodeMapper(RecursiveMapper): # {{{ deal with complex-valued variables def complex_type_name(self, dtype): - if dtype == np.complex64: + from loopy.types import NumpyType + if not isinstance(dtype, NumpyType): + raise LoopyError("'%s' is not a complex type" % dtype) + + if dtype.dtype == np.complex64: return "cfloat" - if dtype == np.complex128: + if dtype.dtype == np.complex128: return "cdouble" else: raise RuntimeError @@ -484,17 +491,20 @@ class LoopyCCodeMapper(RecursiveMapper): return base_impl(expr, enclosing_prec, type_context) tgt_dtype = self.infer_type(expr) - is_complex = tgt_dtype.kind == 'c' + is_complex = tgt_dtype.is_complex() if not is_complex: return base_impl(expr, enclosing_prec, type_context) else: tgt_name = self.complex_type_name(tgt_dtype) - reals = [child for child in expr.children - if 'c' != self.infer_type(child).kind] - complexes = [child for child in expr.children - if 'c' == self.infer_type(child).kind] + reals = [] + complexes = [] + for child in expr.children: + if self.infer_type(child).is_complex(): + complexes.append(child) + else: + reals.append(child) real_sum = self.join_rec(" + ", reals, PREC_SUM, type_context) @@ -534,17 +544,20 @@ class LoopyCCodeMapper(RecursiveMapper): return base_impl(expr, enclosing_prec, type_context) tgt_dtype = self.infer_type(expr) - is_complex = 'c' == tgt_dtype.kind + is_complex = tgt_dtype.is_complex() if not is_complex: return base_impl(expr, enclosing_prec, type_context) else: tgt_name = self.complex_type_name(tgt_dtype) - reals = [child for child in expr.children - if 'c' != self.infer_type(child).kind] - complexes = [child for child in expr.children - if 'c' == self.infer_type(child).kind] + reals = [] + complexes = [] + for child in expr.children: + if self.infer_type(child).is_complex(): + complexes.append(child) + else: + reals.append(child) real_prd = self.join_rec(" * ", reals, PREC_PRODUCT, type_context) @@ -574,7 +587,8 @@ class LoopyCCodeMapper(RecursiveMapper): # analogous to ^{-1} denom = self.rec(expr.denominator, PREC_POWER, type_context) - if n_dtype.kind not in "fc" and d_dtype.kind not in "fc": + if (n_dtype.kind not in "fc" + and d_dtype.kind not in "fc"): # must both be integers if type_context == "f": num = "((float) (%s))" % num @@ -592,8 +606,8 @@ class LoopyCCodeMapper(RecursiveMapper): denom), enclosing_prec, PREC_PRODUCT) - n_dtype = self.infer_type(expr.numerator) - d_dtype = self.infer_type(expr.denominator) + n_dtype = self.infer_type(expr.numerator).numpy_dtype + d_dtype = self.infer_type(expr.denominator).numpy_dtype if not self.allow_complex: return base_impl(expr, enclosing_prec, type_context) @@ -623,7 +637,7 @@ class LoopyCCodeMapper(RecursiveMapper): def map_remainder(self, expr, enclosing_prec, type_context): tgt_dtype = self.infer_type(expr) - if 'c' == tgt_dtype.kind: + if tgt_dtype.is_complex(): raise RuntimeError("complex remainder not defined") return "(%s %% %s)" % ( diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 55f8da4d608d7a1b3a1ca8ba960c498586f4d76d..2fa510b5f83887026697e032a8b463c5df8ed2d1 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -26,10 +26,12 @@ THE SOFTWARE. import numpy as np +from pytools import memoize_method + from loopy.target.c import CTarget from loopy.target.c.codegen.expression import LoopyCCodeMapper -from pytools import memoize_method from loopy.diagnostic import LoopyError +from loopy.types import NumpyType # {{{ vector types @@ -141,10 +143,12 @@ class LoopyCudaCCodeMapper(LoopyCCodeMapper): @staticmethod def _get_index_ctype(kernel): - if kernel.index_dtype == np.int32: + if kernel.index_dtype.numpy_dtype == np.int32: return "int32_t" - else: + elif kernel.index_dtype.numpy_dtype == np.int32: return "int64_t" + else: + raise LoopyError("unexpected index type") def map_group_hw_index(self, expr, enclosing_prec, type_context): return "((%s) blockIdx.%s)" % ( @@ -198,10 +202,10 @@ class CudaTarget(CTarget): return result def is_vector_dtype(self, dtype): - return list(vec.types.values()) + return dtype.numpy_dtype in list(vec.types.values()) def vector_dtype(self, base, count): - return vec.types[base, count] + return NumpyType(vec.types[base.numpy_dtype, count]) # }}} diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index b0b1c247ec78a29b75890b0e9f04448116b97c4e..217678e8218c7fe452cf118175d127866456069f 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -37,9 +37,9 @@ from pytools import memoize_method class LoopyISPCCodeMapper(LoopyCCodeMapper): def _get_index_ctype(self): - if self.kernel.index_dtype == np.int32: + if self.kernel.index_dtype.numpy_dtype == np.int32: return "int32" - elif self.kernel.index_dtype == np.int64: + elif self.kernel.index_dtype.numpy_dtype == np.int64: return "int64" else: raise ValueError("unexpected index_type") diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index cfdc8620bb4a383e4d48f004a2c682ba8e495a05..db53b7cb29224f8620bcc6466b3f42da5a52107f 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -30,6 +30,7 @@ from loopy.target.c import CTarget from loopy.target.c.codegen.expression import LoopyCCodeMapper from pytools import memoize_method from loopy.diagnostic import LoopyError +from loopy.types import NumpyType # {{{ vector types @@ -126,8 +127,8 @@ def opencl_function_mangler(kernel, name, arg_dtypes): return arg_dtypes[0], name if name == "dot": - scalar_dtype, offset, field_name = arg_dtypes[0].fields["s0"] - return scalar_dtype, name + scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] + return NumpyType(scalar_dtype), name return None @@ -192,6 +193,17 @@ class OpenCLTarget(CTarget): """A target for the OpenCL C heterogeneous compute programming language. """ + def __init__(self, atomics_flavor="cl2"): + """ + :arg atomics_flavor: one of ``"cl2"`` (C11-style atomics from OpenCL 2.0), + ``"cl1"`` (OpenCL 1.1 atomics, using bit-for-bit compare-and-swap + for floating point), ``"cl1-exch"`` (OpenCL 1.1 atomics, using + double-exchange for floating point). + """ + super(OpenCLTarget, self).__init__() + + self.atomics_flavor = atomics_flavor + # {{{ library def function_manglers(self): @@ -231,10 +243,10 @@ class OpenCLTarget(CTarget): return result def is_vector_dtype(self, dtype): - return list(vec.types.values()) + return dtype.numpy_dtype in list(vec.types.values()) def vector_dtype(self, base, count): - return vec.types[base, count] + return NumpyType(vec.types[base.numpy_dtype, count]) # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 2947fdc6c5d17e919bdcc329f6243376b00a9969..e14a1dd9d5d74bdabc30e8f70382738d4db4bbbe 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -30,6 +30,7 @@ from six.moves import range import numpy as np from loopy.target.opencl import OpenCLTarget +from loopy.types import NumpyType import logging logger = logging.getLogger(__name__) @@ -181,10 +182,10 @@ def pyopencl_function_mangler(target, name, arg_dtypes): if len(arg_dtypes) == 1 and isinstance(name, str): arg_dtype, = arg_dtypes - if arg_dtype.kind == "c": - if arg_dtype == np.complex64: + if arg_dtype.is_complex(): + if arg_dtype.numpy_dtype == np.complex64: tpname = "cfloat" - elif arg_dtype == np.complex128: + elif arg_dtype.numpy_dtype == np.complex128: tpname = "cdouble" else: raise RuntimeError("unexpected complex type '%s'" % arg_dtype) @@ -196,7 +197,10 @@ def pyopencl_function_mangler(target, name, arg_dtypes): return arg_dtype, "%s_%s" % (tpname, name) if name in ["real", "imag", "abs"]: - return np.dtype(arg_dtype.type(0).real), "%s_%s" % (tpname, name) + return ( + NumpyType( + np.dtype(arg_dtype.numpy_dtype.type(0).real)), + "%s_%s" % (tpname, name)) return None @@ -207,10 +211,12 @@ def pyopencl_preamble_generator(target, seen_dtypes, seen_functions): has_double = False has_complex = False + from loopy.types import NumpyType for dtype in seen_dtypes: - if dtype in [np.float64, np.complex128]: + if (isinstance(dtype, NumpyType) + and dtype.dtype in [np.float64, np.complex128]): has_double = True - if dtype.kind == "c": + if dtype.involves_complex(): has_complex = True if has_complex: @@ -294,7 +300,7 @@ class PyOpenCLTarget(OpenCLTarget): def vector_dtype(self, base, count): from pyopencl.array import vec - return vec.types[base, count] + return NumpyType(vec.types[base.numpy_dtype, count]) def alignment_requirement(self, type_decl): import struct diff --git a/loopy/tools.py b/loopy/tools.py index 777532e7af92bc62e6878f564c8c5545f4cb2c4a..ae370d5aaac9ff75f530e1d0951a2f904b686e42 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -1,5 +1,4 @@ -from __future__ import division -from __future__ import absolute_import +from __future__ import division, absolute_import import six __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -35,7 +34,7 @@ from six.moves import intern if six.PY2: def is_integer(obj): - return isinstance(obj, (int, long, np.integer)) + return isinstance(obj, (int, long, np.integer)) # noqa else: def is_integer(obj): return isinstance(obj, (int, np.integer)) @@ -114,70 +113,6 @@ class PymbolicExpressionHashWrapper(object): # }}} -# {{{ picklable dtype - -class PicklableDtype(object): - """This object works around several issues with pickling :class:`numpy.dtype` - objects. It does so by serving as a picklable wrapper around the original - dtype. - - The issues are the following - - - :class:`numpy.dtype` objects for custom types in :mod:`loopy` are usually - registered in the target's dtype registry. This registration may - have been lost after unpickling. This container restores it implicitly, - as part of unpickling. - - - There is a`numpy bug <https://github.com/numpy/numpy/issues/4317>`_ - that prevents unpickled dtypes from hashing properly. This is solved - by retrieving the 'canonical' type from the dtype registry. - """ - - def __init__(self, dtype, target=None): - assert not isinstance(dtype, PicklableDtype) - - if dtype is None: - raise TypeError("may not pass None to construct PicklableDtype") - - self.target = target - self.dtype = np.dtype(dtype) - - def __hash__(self): - return hash(self.dtype) - - def __eq__(self, other): - return ( - type(self) == type(other) - and self.dtype == other.dtype) - - def __ne__(self, other): - return not self.__eq__(self, other) - - def __getstate__(self): - if self.target is None: - raise RuntimeError("unable to pickle dtype: target not known") - - c_name = self.target.dtype_to_typename(self.dtype) - return (self.target, c_name, self.dtype) - - def __setstate__(self, state): - target, name, dtype = state - self.target = target - self.dtype = self.target.get_or_register_dtype([name], dtype) - - def with_target(self, target): - if (self.target is not None - and target is not self.target): - raise RuntimeError("target already set to different value") - - return PicklableDtype(self.dtype, target) - - def assert_has_target(self): - assert self.target is not None - -# }}} - - # {{{ remove common indentation def remove_common_indentation(code, require_leading_newline=True, diff --git a/loopy/types.py b/loopy/types.py new file mode 100644 index 0000000000000000000000000000000000000000..4ed6aa0ffe6aab8ec9fbdde5721b13658d9059cb --- /dev/null +++ b/loopy/types.py @@ -0,0 +1,212 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six # noqa +import numpy as np + +from loopy.diagnostic import LoopyError + + +class LoopyType(object): + def with_target(self, target): + return self + + def is_integral(self): + raise NotImplementedError() + + def is_complex(self): + raise NotImplementedError() + + def uses_complex(self): + raise NotImplementedError() + + def is_composite(self): + raise NotImplementedError() + + @property + def itemsize(self): + raise NotImplementedError() + + @property + def numpy_dtype(self): + raise ValueError("'%s' is not a numpy type" + % str(self)) + + +class AtomicType(LoopyType): + pass + + +# {{{ numpy-based dtype + +class NumpyType(LoopyType): + """This object works around several issues with pickling :class:`numpy.dtype` + objects. It does so by serving as a picklable wrapper around the original + dtype. + + The issues are the following + + - :class:`numpy.dtype` objects for custom types in :mod:`loopy` are usually + registered in the target's dtype registry. This registration may + have been lost after unpickling. This container restores it implicitly, + as part of unpickling. + + - There is a`numpy bug <https://github.com/numpy/numpy/issues/4317>`_ + that prevents unpickled dtypes from hashing properly. This is solved + by retrieving the 'canonical' type from the dtype registry. + """ + + def __init__(self, dtype, target=None): + assert not isinstance(dtype, NumpyType) + + if dtype is None: + raise TypeError("may not pass None to construct NumpyType") + + if dtype == object: + raise TypeError("loopy does not directly support object arrays") + + self.target = target + self.dtype = np.dtype(dtype) + + def __hash__(self): + return hash(self.dtype) + + def __eq__(self, other): + return ( + type(self) == type(other) + and self.dtype == other.dtype) + + def __ne__(self, other): + return not super(NumpyType, self).__eq__(other) + + def __getstate__(self): + if self.target is None: + raise RuntimeError("unable to pickle dtype: target not known") + + c_name = self.target.dtype_to_typename(self.dtype) + return (self.target, c_name, self.dtype) + + def __setstate__(self, state): + target, name, dtype = state + self.target = target + self.dtype = self.target.get_or_register_dtype([name], dtype) + + def with_target(self, target): + if (self.target is not None + and target is not self.target): + raise RuntimeError("target already set to different value") + + return type(self)(self.dtype, target) + + def assert_has_target(self): + assert self.target is not None + + def is_integral(self): + return self.dtype.kind in "iu" + + def is_complex(self): + return self.dtype.kind == "c" + + def involves_complex(self): + def dtype_involves_complex(dtype): + if dtype.kind == "c": + return True + + if dtype.fields is None: + return False + else: + return any( + dtype_involves_complex(f[0]) + for f in six.itervalues(dtype.fields)) + + return dtype_involves_complex(self.dtype) + + def is_composite(self): + return self.dtype.kind == "V" + + @property + def itemsize(self): + return self.dtype.itemsize + + @property + def numpy_dtype(self): + return self.dtype + + def __repr__(self): + return repr(self.dtype) + +# }}} + + +# {{{ atomic dtype + +class AtomicNumpyType(NumpyType, AtomicType): + """A dtype wrapper that indicates that the described type should be capable + of atomic operations. + """ + def __hash__(self): + return 0xa7031c ^ hash(self.dtype) + + def __repr__(self): + return "atomic:%s" % repr(self.dtype) + +# }}} + + +def to_loopy_type(dtype, allow_none=False, allow_auto=False, for_atomic=False): + from loopy.kernel.data import auto + if allow_none and dtype is None: + return dtype + elif allow_auto and dtype is auto: + return dtype + + numpy_dtype = None + + if dtype is not None: + try: + numpy_dtype = np.dtype(dtype) + except Exception: + pass + + if isinstance(dtype, LoopyType): + if for_atomic: + if isinstance(dtype, NumpyType): + return AtomicNumpyType(dtype.dtype) + elif not isinstance(dtype, AtomicType): + raise LoopyError("do not know how to convert '%s' to an atomic type" + % dtype) + + return dtype + + elif numpy_dtype is not None: + if for_atomic: + return AtomicNumpyType(numpy_dtype) + else: + return NumpyType(numpy_dtype) + + else: + raise TypeError("dtype must be a LoopyType, or convertible to one, " + "found '%s' instead" % type(dtype)) + +# vim: foldmethod=marker diff --git a/loopy/version.py b/loopy/version.py index adc069663503b200bcdd1638c05ae0ffae5f14df..6cbbc59e17b1e6940a1c3247f1bf7b60d5a0ef61 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v19-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v21-islpy%s" % _islpy_version diff --git a/test/test_loopy.py b/test/test_loopy.py index 606eec7667ca3d76a215c3b487e8c93bc371c36e..c7d7755e7a7b51d8085d57b7263b0249bd9520e1 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2421,6 +2421,26 @@ def test_chunk_iname(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=130)) +def test_atomic(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{ [i]: 0<=i<n }", + "out[i%20] = out[i%20] + 2*a[i] {atomic}", + [ + lp.GlobalArg("out", np.float32, shape=lp.auto, + for_atomic=True), + lp.GlobalArg("a", np.float32, shape=lp.auto), + "..." + ], + assumptions="n>0") + + ref_knl = knl + knl = lp.chunk_iname(knl, "i", 3, inner_tag="l.0") + knl = lp.set_loop_priority(knl, "i_outer, i_inner") + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=130)) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])