diff --git a/.gitignore b/.gitignore index 8c42a2c713dda5ef85e52503120dde5a04af2fa6..322e3a7caa7f229bccba899cb9bc993d95364b86 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ distribute*tar.gz *.log *profiler.conf core +.coverage +htmlcov diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..504e23cf344e2d5ae35f6f6abe97458b8c7a39b8 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "loopy/target/opencl/compyte"] + path = loopy/target/opencl/compyte + url = https://github.com/inducer/compyte diff --git a/README.rst b/README.rst index 6390e8afa4988d7af778375c633a438372d5220e..0e551fbede0460a2e7c76167b54d672afdf81286 100644 --- a/README.rst +++ b/README.rst @@ -43,6 +43,11 @@ It is not (and does not want to be) a general-purpose programming language. Loopy is licensed under the liberal `MIT license <http://en.wikipedia.org/wiki/MIT_License>`_ and free for commercial, academic, -and private use. To use Loopy, you just need `pyopencl -<http://mathema.tician.de/software/pyopencl>`_. +and private use. All of Loopy's dependencies can be automatically installed from +the package index after using:: + + pip install loo.py + +In addition, Loopy is compatible with and enhances +`pyopencl <http://mathema.tician.de/software/pyopencl>`_. diff --git a/loopy/__init__.py b/loopy/__init__.py index bb397db32224fc761b2b5bbeed35f9c1fbf834da..6ef10b3408e9954b16c24af88ec526e59158da0d 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -37,12 +37,7 @@ from loopy.diagnostic import LoopyError # {{{ imported user interface from loopy.library.function import ( - default_function_mangler, single_arg_function_mangler, - opencl_function_mangler) - -from loopy.library.preamble import default_preamble_generator - -from loopy.library.symbol import opencl_symbol_mangler + default_function_mangler, single_arg_function_mangler) from loopy.kernel.data import ( auto, @@ -81,8 +76,6 @@ __all__ = [ "ExpressionInstruction", "CInstruction", "default_function_mangler", "single_arg_function_mangler", - "opencl_function_mangler", "opencl_symbol_mangler", - "default_preamble_generator", "make_kernel", "UniqueName", diff --git a/loopy/auto_test.py b/loopy/auto_test.py index cc36f3c525194fbc3418b0962835c9ca77af8929..f73183526f29eb9c559d9479e5d1bddf322e45f5 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -420,7 +420,7 @@ def auto_test_vs_ref( ref_queue = cl.CommandQueue(ref_ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) - pp_ref_knl = lp.preprocess_kernel(ref_knl, device=dev) + pp_ref_knl = lp.preprocess_kernel(ref_knl) for knl in lp.generate_loop_schedules(pp_ref_knl): ref_sched_kernel = knl @@ -514,7 +514,7 @@ def auto_test_vs_ref( if test_knl.state not in [ kernel_state.PREPROCESSED, kernel_state.SCHEDULED]: - test_knl = lp.preprocess_kernel(test_knl, device=ctx.devices[0]) + test_knl = lp.preprocess_kernel(test_knl) if not test_knl.schedule: test_kernels = lp.generate_loop_schedules(test_knl) diff --git a/loopy/check.py b/loopy/check.py index d3fecda62d69de5ba134a16556cc8cdbce5a7ccf..09ca0c576b6b3a2bca8cc19b384c8ff56e8d20c2 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -67,7 +67,7 @@ def check_insn_attributes(kernel): def check_loop_priority_inames_known(kernel): for iname in kernel.loop_priority: - if not iname in kernel.all_inames(): + if iname not in kernel.all_inames(): raise LoopyError("unknown iname '%s' in loop priorities" % iname) @@ -376,54 +376,6 @@ def pre_schedule_checks(kernel): # {{{ pre-code-generation checks -def check_sizes(kernel, device): - import loopy as lp - - from loopy.diagnostic import LoopyAdvisory - - parameters = {} - for arg in kernel.args: - if isinstance(arg, lp.ValueArg) and arg.approximately is not None: - parameters[arg.name] = arg.approximately - - glens, llens = kernel.get_grid_sizes_as_exprs() - - if (max(len(glens), len(llens)) - > device.max_work_item_dimensions): - raise LoopyError("too many work item dimensions") - - from pymbolic import evaluate - from pymbolic.mapper.evaluator import UnknownVariableError - try: - glens = evaluate(glens, parameters) - llens = evaluate(llens, parameters) - except UnknownVariableError as name: - from warnings import warn - warn("could not check axis bounds because no value " - "for variable '%s' was passed to check_kernels()" - % name, LoopyAdvisory) - else: - for i in range(len(llens)): - if llens[i] > device.max_work_item_sizes[i]: - raise LoopyError("group axis %d too big" % i) - - from pytools import product - if product(llens) > device.max_work_group_size: - raise LoopyError("work group too big") - - from pyopencl.characterize import usable_local_mem_size - if kernel.local_mem_use() > usable_local_mem_size(device): - raise LoopyError("using too much local memory") - - from loopy.kernel.data import ConstantArg - const_arg_count = sum( - 1 for arg in kernel.args - if isinstance(arg, ConstantArg)) - - if const_arg_count > device.max_constant_args: - raise LoopyError("too many constant arguments") - - def check_that_shapes_and_strides_are_arguments(kernel): from loopy.kernel.data import ValueArg from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag @@ -460,18 +412,11 @@ def check_that_shapes_and_strides_are_arguments(kernel): arg.name, ", ".join(deps-integer_arg_names))) -def pre_codegen_checks(kernel, device=None): +def pre_codegen_checks(kernel): try: logger.info("pre-codegen check %s: start" % kernel.name) - if device is not None: - check_sizes(kernel, device) - else: - from loopy.diagnostic import warn - warn(kernel, "no_device_in_pre_codegen_checks", - "No device parameter was passed to loopy.pre_codegen_checks. " - "Perhaps you want to pass a device argument to generate_code.") - + kernel.target.pre_codegen_check(kernel) check_that_shapes_and_strides_are_arguments(kernel) logger.info("pre-codegen check %s: done" % kernel.name) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 0f645c2dde3707890fa33b06b7ac9f064d437ecb..c14c1ffa24157a51c58b31a166913e94a558e062 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -220,11 +220,12 @@ class POD(Declarator): and the *name* is given as a string. """ - def __init__(self, dtype, name): + def __init__(self, target, dtype, name): dtype = np.dtype(dtype) - from pyopencl.tools import dtype_to_ctype - self.ctype = dtype_to_ctype(dtype) + self.target = target + self.ctype = target.dtype_to_typename(dtype) + self.dtype = dtype self.name = name def get_decl_pair(self): @@ -233,17 +234,11 @@ class POD(Declarator): def struct_maker_code(self, name): return name - @property - def dtype(self): - from pyopencl.tools import NAME_TO_DTYPE - return NAME_TO_DTYPE[self.ctype] - def struct_format(self): return self.dtype.char def alignment_requirement(self): - import pyopencl._pvt_struct as _struct - return _struct.calcsize(self.struct_format()) + return self.target.alignment_requirement(self) def default_value(self): return 0 @@ -297,7 +292,7 @@ class ImplementedDataInfo(Record): .. attribute:: allows_offset """ - def __init__(self, name, dtype, cgen_declarator, arg_class, + def __init__(self, target, name, dtype, cgen_declarator, arg_class, base_name=None, shape=None, strides=None, unvec_shape=None, unvec_strides=None, @@ -308,7 +303,7 @@ class ImplementedDataInfo(Record): Record.__init__(self, name=name, - picklable_dtype=PicklableDtype(dtype), + picklable_dtype=PicklableDtype(dtype, target=target), cgen_declarator=cgen_declarator, arg_class=arg_class, base_name=base_name, @@ -338,6 +333,11 @@ code_gen_cache = PersistentDict("loopy-code-gen-cache-v3-"+DATA_MODEL_VERSION, # {{{ main code generation entrypoint def generate_code(kernel, device=None): + if device is not None: + from warnings import warn + warn("passing 'device' to generate_code() is deprecated", + DeprecationWarning, stacklevel=2) + if kernel.schedule is None: from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) @@ -351,14 +351,9 @@ def generate_code(kernel, device=None): from loopy import CACHING_ENABLED if CACHING_ENABLED: - if device is not None: - device_id = device.persistent_unique_id - else: - device_id = None - - code_gen_cache_key = (kernel, device_id) + input_kernel = kernel try: - result = code_gen_cache[code_gen_cache_key] + result = code_gen_cache[input_kernel] logger.info("%s: code generation cache hit" % kernel.name) return result except KeyError: @@ -370,7 +365,7 @@ def generate_code(kernel, device=None): kernel = infer_unknown_types(kernel, expect_completion=True) from loopy.check import pre_codegen_checks - pre_codegen_checks(kernel, device=device) + pre_codegen_checks(kernel) from cgen import (FunctionBody, FunctionDeclaration, Value, Module, Block, @@ -407,14 +402,16 @@ def generate_code(kernel, device=None): if isinstance(arg, ArrayBase): impl_arg_info.extend( arg.decl_info( + kernel.target, is_written=arg.name in kernel.get_written_variables(), index_dtype=kernel.index_dtype)) elif isinstance(arg, ValueArg): impl_arg_info.append(ImplementedDataInfo( + target=kernel.target, name=arg.name, dtype=arg.dtype, - cgen_declarator=Const(POD(arg.dtype, arg.name)), + cgen_declarator=Const(POD(kernel.target, arg.dtype, arg.name)), arg_class=ValueArg)) else: @@ -427,12 +424,11 @@ def generate_code(kernel, device=None): # }}} - from pyopencl.tools import dtype_to_ctype mod.extend([ LiteralLines(r""" #define lid(N) ((%(idx_ctype)s) get_local_id(N)) #define gid(N) ((%(idx_ctype)s) get_group_id(N)) - """ % dict(idx_ctype=dtype_to_ctype(kernel.index_dtype))), + """ % dict(idx_ctype=kernel.target.dtype_to_typename(kernel.index_dtype))), Line()]) # {{{ build lmem array declarators for temporary variables @@ -441,6 +437,7 @@ def generate_code(kernel, device=None): idi.cgen_declarator for tv in six.itervalues(kernel.temporary_variables) for idi in tv.decl_info( + kernel.target, is_written=True, index_dtype=kernel.index_dtype)) # }}} @@ -478,8 +475,11 @@ def generate_code(kernel, device=None): seen_dtypes.add(tv.dtype) preambles = kernel.preambles[:] - for prea_gen in kernel.preamble_generators: - preambles.extend(prea_gen(seen_dtypes, seen_functions)) + + preamble_generators = (kernel.preamble_generators + + kernel.target.preamble_generators()) + for prea_gen in preamble_generators: + preambles.extend(prea_gen(kernel.target, seen_dtypes, seen_functions)) seen_preamble_tags = set() dedup_preambles = [] @@ -506,7 +506,9 @@ def generate_code(kernel, device=None): result = result, impl_arg_info - code_gen_cache[code_gen_cache_key] = result + if CACHING_ENABLED: + code_gen_cache[input_kernel] = result + return result # }}} diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py index 1963c299571206a20ea0c48ab49b2923968e31bb..bbba284c5d1329ffbd951840fac060b53832e0dd 100644 --- a/loopy/codegen/expression.py +++ b/loopy/codegen/expression.py @@ -33,8 +33,6 @@ from pymbolic.mapper.stringifier import (PREC_NONE, PREC_CALL, PREC_PRODUCT, PREC_POWER) from pymbolic.mapper import CombineMapper import islpy as isl -import pyopencl as cl -import pyopencl.array # noqa from pytools import Record from loopy.tools import is_integer @@ -179,11 +177,10 @@ class TypeInferenceMapper(CombineMapper): if expr.name in self.kernel.all_inames(): return self.kernel.index_dtype - for mangler in self.kernel.symbol_manglers: - result = mangler(expr.name) - if result is not None: - result_dtype, _ = result - return result_dtype + result = self.kernel.mangle_symbol(expr.name) + if result is not None: + result_dtype, _ = result + return result_dtype obj = self.new_assignments.get(expr.name) @@ -238,7 +235,8 @@ class TypeInferenceMapper(CombineMapper): map_logical_or = map_comparison def map_reduction(self, expr): - return expr.operation.result_dtype(self.rec(expr.expr), expr.inames) + return expr.operation.result_dtype( + self.kernel.target, self.rec(expr.expr), expr.inames) # }}} @@ -251,7 +249,7 @@ class TypeInferenceMapper(CombineMapper): # - 'd' for double-precision floating point # or None for 'no known context'. -def dtype_to_type_context(dtype): +def dtype_to_type_context(target, dtype): dtype = np.dtype(dtype) if dtype.kind == 'i': @@ -260,8 +258,8 @@ def dtype_to_type_context(dtype): return 'd' if dtype in [np.float32, np.complex64]: return 'f' - if dtype in list(cl.array.vec.types.values()): - return dtype_to_type_context(dtype.fields["x"][0]) + if target.is_vector_dtype(dtype): + return dtype_to_type_context(target, dtype.fields["x"][0]) return None @@ -402,11 +400,10 @@ class LoopyCCodeMapper(RecursiveMapper): raise RuntimeError("unsubscripted reference to array '%s'" % expr.name) - for mangler in self.kernel.symbol_manglers: - result = mangler(expr.name) - if result is not None: - _, c_name = result - return c_name + result = self.kernel.mangle_symbol(expr.name) + if result is not None: + _, c_name = result + return c_name return expr.name @@ -447,7 +444,7 @@ class LoopyCCodeMapper(RecursiveMapper): from loopy.kernel.array import get_access_info from pymbolic import evaluate - access_info = get_access_info(ary, expr.index, + access_info = get_access_info(self.kernel.target, ary, expr.index, lambda expr: evaluate(expr, self.var_subst_map)) vec_member = get_opencl_vec_member(access_info.vector_index) @@ -462,7 +459,7 @@ class LoopyCCodeMapper(RecursiveMapper): if ary.dtype == np.float32: return base_access+".x" - if ary.dtype in cl.array.vec.type_to_scalar_and_count: + if self.kernel.target.is_vector_dtype(ary.dtype): return base_access elif ary.dtype == np.float64: return "as_double(%s.xy)" % base_access @@ -667,7 +664,8 @@ class LoopyCCodeMapper(RecursiveMapper): result_dtype, c_name, arg_tgt_dtypes = mangle_result str_parameters = [ - self.rec(par, PREC_NONE, dtype_to_type_context(tgt_dtype), + self.rec(par, PREC_NONE, + dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip( expr.parameters, par_dtypes, arg_tgt_dtypes)] @@ -683,7 +681,8 @@ class LoopyCCodeMapper(RecursiveMapper): # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, - type_context=dtype_to_type_context(par_dtype)) + type_context=dtype_to_type_context( + self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes)] if c_name is None: diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index cc102f0c26834eceee2449cd74e52b94bc19a848..ca1cce69c4cf1f87233c35e3dfc7987940f132fa 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -99,7 +99,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): result = Assign( lhs_code, ccm(expr, prec=PREC_NONE, - type_context=dtype_to_type_context(target_dtype), + type_context=dtype_to_type_context(kernel.target, target_dtype), needed_dtype=target_dtype)) if kernel.options.trace_assignments or kernel.options.trace_assignment_values: @@ -174,7 +174,7 @@ def generate_c_instruction_code(kernel, insn, codegen_state): body.append( Initializer( - POD(kernel.index_dtype, name), + POD(kernel.target, kernel.index_dtype, name), codegen_state.c_code_mapper( iname_expr, prec=PREC_NONE, type_context="i"))) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index f5e9c9f7fc87e24b47e193e6740a928f695cd3e2..40f433a90f90855aa554261b8d6fb5cbdd5fb0ec 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -369,11 +369,10 @@ def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state): else: from loopy.codegen import wrap_in - from pyopencl.tools import dtype_to_ctype result.append(wrap_in(For, "%s %s = %s" - % (dtype_to_ctype(kernel.index_dtype), + % (kernel.target.dtype_to_typename(kernel.index_dtype), loop_iname, ccm(aff_to_expr(static_lbound), PREC_NONE, "i")), "%s <= %s" % ( loop_iname, ccm(aff_to_expr(static_ubound), PREC_NONE, "i")), diff --git a/loopy/compiled.py b/loopy/compiled.py index 14a2ed46a0bb93eeb4c7341702b7ab07ddb52208..7fb686e742c914e87ce07f3a9043308d7ff61c9f 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -384,7 +384,9 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, options): "be supplied\")" % arg.name) gen("") - if is_written and arg.arg_class is lp.ImageArg and not options.skip_arg_checks: + if (is_written + and arg.arg_class is lp.ImageArg + and not options.skip_arg_checks): gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written image '%s' must " @@ -683,7 +685,7 @@ class CompiledKernel: if kernel.schedule is None: from loopy.preprocess import preprocess_kernel - kernel = preprocess_kernel(kernel, self.context.devices[0]) + kernel = preprocess_kernel(kernel) from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) @@ -695,7 +697,7 @@ class CompiledKernel: kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code - code, impl_arg_info = generate_code(kernel, device=self.context.devices[0]) + code, impl_arg_info = generate_code(kernel) if self.kernel.options.write_cl: output = code @@ -733,7 +735,7 @@ class CompiledKernel: kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) from loopy.codegen import generate_code - code, arg_info = generate_code(kernel, device=self.context.devices[0]) + code, arg_info = generate_code(kernel) return code def get_highlighted_code(self, arg_to_dtype=None): diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 754944cefb68a4f7bdbc2f2aa9f7de29268b66cd..f048f06b8c1087dc45f462b69bcf20596cdd7ee5 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -32,62 +32,6 @@ import islpy as isl from islpy import dim_type -def block_shift_constraint(cns, type, pos, multiple, as_equality=None): - if as_equality != cns.is_equality(): - if as_equality: - factory = isl.Constraint.equality_from_aff - else: - factory = isl.Constraint.inequality_from_aff - - cns = factory(cns.get_aff()) - - cns = cns.set_constant(cns.get_constant() - + cns.get_coefficient(type, pos)*multiple) - - return cns - - -def negate_constraint(cns): - assert not cns.is_equality() - # FIXME hackety hack - my_set = (isl.BasicSet.universe(cns.get_space()) - .add_constraint(cns)) - my_set = my_set.complement() - - results = [] - - def examine_basic_set(s): - s.foreach_constraint(results.append) - - my_set.foreach_basic_set(examine_basic_set) - result, = results - return result - - -def make_index_map(set, index_expr): - from loopy.symbolic import eq_constraint_from_expr - - if not isinstance(index_expr, tuple): - index_expr = (index_expr,) - - amap = isl.Map.from_domain(set).add_dims(dim_type.out, len(index_expr)) - out_names = ["_ary_idx_%d" % i for i in range(len(index_expr))] - - dim = amap.get_space() - all_constraints = tuple( - eq_constraint_from_expr(dim, iexpr_i) - for iexpr_i in index_expr) - - for i, out_name in enumerate(out_names): - amap = amap.set_dim_name(dim_type.out, i, out_name) - - for i, (out_name, constr) in enumerate(zip(out_names, all_constraints)): - constr.set_coefficients_by_name({out_name: -1}) - amap = amap.add_constraint(constr) - - return amap - - def pw_aff_to_aff(pw_aff): if isinstance(pw_aff, isl.Aff): return pw_aff diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 7dd67bc42077088f31bd219cd179709fce959661..db1af79ad110c9e195a3c82acc77ba12f82fe3c2 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -39,12 +39,8 @@ from pytools import UniqueNameGenerator, generate_unique_names from loopy.library.function import ( default_function_mangler, - opencl_function_mangler, single_arg_function_mangler) -from loopy.library.symbol import opencl_symbol_mangler -from loopy.library.preamble import default_preamble_generator - from loopy.diagnostic import CannotBranchDomainTree @@ -146,6 +142,10 @@ class LoopKernel(RecordWithoutPickling): .. attribute:: state A value from :class:`kernel_state`. + + .. attribute:: target + + A subclass of :class:`loopy.target.TargetBase`. """ # {{{ constructor @@ -153,7 +153,7 @@ class LoopKernel(RecordWithoutPickling): def __init__(self, domains, instructions, args=[], schedule=None, name="loopy_kernel", preambles=[], - preamble_generators=[default_preamble_generator], + preamble_generators=[], assumptions=None, local_sizes={}, temporary_variables={}, @@ -161,10 +161,9 @@ class LoopKernel(RecordWithoutPickling): substitutions={}, function_manglers=[ default_function_mangler, - opencl_function_mangler, single_arg_function_mangler, ], - symbol_manglers=[opencl_symbol_mangler], + symbol_manglers=[], iname_slab_increments={}, loop_priority=[], @@ -176,6 +175,7 @@ class LoopKernel(RecordWithoutPickling): options=None, state=kernel_state.INITIAL, + target=None, # When kernels get intersected in slab decomposition, # their grid sizes shouldn't change. This provides @@ -278,15 +278,18 @@ class LoopKernel(RecordWithoutPickling): symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, - state=state) + state=state, + target=target) # }}} # {{{ function mangling def mangle_function(self, identifier, arg_dtypes): - for mangler in self.function_manglers: - mangle_result = mangler(identifier, arg_dtypes) + manglers = self.target.function_manglers() + self.function_manglers + + for mangler in manglers: + mangle_result = mangler(self.target, identifier, arg_dtypes) if mangle_result is not None: return mangle_result @@ -294,6 +297,20 @@ class LoopKernel(RecordWithoutPickling): # }}} + # {{{ symbol mangling + + def mangle_symbol(self, identifier): + manglers = self.target.symbol_manglers() + self.symbol_manglers + + for mangler in manglers: + result = mangler(self.target, identifier) + if result is not None: + return result + + return None + + # }}} + # {{{ name wrangling @memoize_method @@ -1085,6 +1102,7 @@ class LoopKernel(RecordWithoutPickling): "silenced_warnings", "options", "state", + "target", ] comparison_fields = hash_fields + [ diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index a14451ba1d6f4f6843c006ff9ab982ec7ebb302e..5503437555f3f81e2856cf512238d7c59589a29f 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -31,9 +31,6 @@ from six import iteritems from pytools import Record, memoize_method -import pyopencl as cl # noqa -import pyopencl.array # noqa - import numpy as np # noqa from loopy.diagnostic import LoopyError @@ -244,6 +241,8 @@ def _parse_array_dim_tag(tag, default_target_axis, nesting_levels): padded_stride_match = PADDED_STRIDE_TAG_RE.match(tag) if padded_stride_match is not None: tag = padded_stride_match.group(1) + + from loopy.symbolic import parse pad_to = parse(padded_stride_match.group(2)) else: pad_to = None @@ -592,7 +591,10 @@ class ArrayBase(Record): if dtype is not None and dtype is not lp.auto: from loopy.tools import PicklableDtype - picklable_dtype = PicklableDtype(dtype) + if not isinstance(dtype, PicklableDtype): + picklable_dtype = PicklableDtype(dtype) + else: + picklable_dtype = dtype if picklable_dtype.dtype == object: raise TypeError("loopy does not directly support object arrays " @@ -826,7 +828,7 @@ class ArrayBase(Record): return self.copy(**kwargs) - def vector_size(self): + def vector_size(self, target): """Return the size of the vector type used for the array divided by the basic data type. @@ -844,13 +846,13 @@ class ArrayBase(Record): "length for vector axis %d (0-based)" % ( self.name, i)) - vec_dtype = cl.array.vec.types[self.dtype, shape_i] + vec_dtype = target.vector_dtype(self.dtype, shape_i) return int(vec_dtype.itemsize) // int(self.dtype.itemsize) return 1 - def decl_info(self, is_written, index_dtype): + def decl_info(self, target, is_written, index_dtype): """Return a list of :class:`loopy.codegen.ImplementedDataInfo` instances corresponding to the argume """ @@ -907,6 +909,7 @@ class ArrayBase(Record): stride_args.append( ImplementedDataInfo( + target=target, name=stride_name, dtype=index_dtype, cgen_declarator=Const(POD(index_dtype, stride_name)), @@ -915,12 +918,13 @@ class ArrayBase(Record): full_name, stride_impl_axis))) yield ImplementedDataInfo( + target=target, name=full_name, base_name=self.name, # implemented by various argument types cgen_declarator=self.get_arg_decl( - name_suffix, shape, dtype, is_written), + target, name_suffix, shape, dtype, is_written), arg_class=type(self), dtype=dtype, @@ -935,6 +939,7 @@ class ArrayBase(Record): from cgen import Const, POD offset_name = full_name+"_offset" yield ImplementedDataInfo( + target=target, name=offset_name, dtype=index_dtype, cgen_declarator=Const(POD(index_dtype, offset_name)), @@ -1003,7 +1008,7 @@ class ArrayBase(Record): # vectors always have stride 1 unvec_strides + (1,), stride_arg_axes, - cl.array.vec.types[dtype, shape_i], + target.vector_dtype(dtype, shape_i), user_index + (None,)): yield res @@ -1067,7 +1072,7 @@ class AccessInfo(Record): """ -def get_access_info(ary, index, eval_expr): +def get_access_info(target, ary, index, eval_expr): """ :arg ary: an object of type :class:`ArrayBase` :arg index: a tuple of indices representing a subscript into ary @@ -1109,7 +1114,7 @@ def get_access_info(ary, index, eval_expr): vector_index = None subscripts = [0] * num_target_axes - vector_size = ary.vector_size() + vector_size = ary.vector_size(target) # {{{ process separate-array dim tags first, to find array name diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 0727000f9d1b5cda84359c0d7ac3e49e5694f028..426fd3c537ee7635d661980bd3655031387b3568 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1043,6 +1043,8 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): to silence :arg options: an instance of :class:`loopy.Options` or an equivalent string representation + :arg target: an instance of :class:`loopy.target.TargetBase`, or *None*, + to use an OpenCL target. """ defines = kwargs.pop("defines", {}) @@ -1051,6 +1053,17 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): silenced_warnings = kwargs.pop("silenced_warnings", []) options = kwargs.pop("options", None) flags = kwargs.pop("flags", None) + target = kwargs.pop("target", None) + + if target is None: + try: + import pyopencl # noqa + except ImportError: + from loopy.target.opencl import OpenCLTarget + target = OpenCLTarget() + else: + from loopy.target.pyopencl import PyOpenCLTarget + target = PyOpenCLTarget() if flags is not None: if options is not None: @@ -1153,6 +1166,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): temporary_variables=temporary_variables, silenced_warnings=silenced_warnings, options=options, + target=target, **kwargs) from loopy import duplicate_inames diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index a563b9ef39560dacfa77943e037d4a25b2213c15..eae4008b8b515ad4e82c3a140f53866d4c8de50e 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -175,10 +175,9 @@ def parse_tag(tag): # {{{ arguments - class KernelArgument(Record): def __init__(self, **kwargs): - dtype = kwargs.pop("dtype") + dtype = kwargs.pop("dtype", None) if isinstance(dtype, np.dtype): from loopy.tools import PicklableDtype @@ -210,13 +209,13 @@ class GlobalArg(ArrayBase, KernelArgument): min_target_axes = 0 max_target_axes = 1 - def get_arg_decl(self, name_suffix, shape, dtype, is_written): + def get_arg_decl(self, target, name_suffix, shape, dtype, is_written): from loopy.codegen import POD # uses the correct complex type from cgen import RestrictPointer, Const from cgen.opencl import CLGlobal arg_decl = RestrictPointer( - POD(dtype, self.name + name_suffix)) + POD(target, dtype, self.name + name_suffix)) if not is_written: arg_decl = Const(arg_decl) @@ -228,7 +227,7 @@ class ConstantArg(ArrayBase, KernelArgument): min_target_axes = 0 max_target_axes = 1 - def get_arg_decl(self, name_suffix, shape, dtype, is_written): + def get_arg_decl(self, target, name_suffix, shape, dtype, is_written): from loopy.codegen import POD # uses the correct complex type from cgen import RestrictPointer, Const from cgen.opencl import CLConstant @@ -250,7 +249,7 @@ class ImageArg(ArrayBase, KernelArgument): def dimensions(self): return len(self.dim_tags) - def get_arg_decl(self, name_suffix, shape, dtype, is_written): + def get_arg_decl(self, target, name_suffix, shape, dtype, is_written): if is_written: mode = "w" else: @@ -262,7 +261,8 @@ class ImageArg(ArrayBase, KernelArgument): class ValueArg(KernelArgument): def __init__(self, name, dtype=None, approximately=1000): - if dtype is not None: + from loopy.tools import PicklableDtype + if dtype is not None and not isinstance(dtype, PicklableDtype): dtype = np.dtype(dtype) KernelArgument.__init__(self, name=name, dtype=dtype, @@ -342,12 +342,12 @@ class TemporaryVariable(ArrayBase): from pytools import product return product(si for si in self.shape)*self.dtype.itemsize - def get_arg_decl(self, name_suffix, shape, dtype, is_written): + def get_arg_decl(self, target, name_suffix, shape, dtype, is_written): from cgen import ArrayOf from loopy.codegen import POD # uses the correct complex type from cgen.opencl import CLLocal - temp_var_decl = POD(self.dtype, self.name) + temp_var_decl = POD(target, self.dtype, self.name) # FIXME take into account storage_shape, or something like it storage_shape = self.shape diff --git a/loopy/library/function.py b/loopy/library/function.py index fcae07b6b599761e4081c40f1eaad54ca9bcb2c6..e494169bbe5b83df852ea3d483ed3640381891f6 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -23,64 +23,19 @@ THE SOFTWARE. """ -import numpy as np - - -def default_function_mangler(name, arg_dtypes): +def default_function_mangler(target, name, arg_dtypes): from loopy.library.reduction import reduction_function_mangler manglers = [reduction_function_mangler] for mangler in manglers: - result = mangler(name, arg_dtypes) + result = mangler(target, name, arg_dtypes) if result is not None: return result return None -def opencl_function_mangler(name, arg_dtypes): - if name in ["max", "min"] and len(arg_dtypes) == 2: - dtype = np.find_common_type([], arg_dtypes) - - if dtype.kind == "c": - raise RuntimeError("min/max do not support complex numbers") - - if dtype.kind == "f": - name = "f" + name - - return dtype, name - - if name in "atan2" and len(arg_dtypes) == 2: - return arg_dtypes[0], name - - if len(arg_dtypes) == 1: - arg_dtype, = arg_dtypes - - if arg_dtype.kind == "c": - if arg_dtype == np.complex64: - tpname = "cfloat" - elif arg_dtype == np.complex128: - tpname = "cdouble" - else: - raise RuntimeError("unexpected complex type '%s'" % arg_dtype) - - if name in ["sqrt", "exp", "log", - "sin", "cos", "tan", - "sinh", "cosh", "tanh", - "conj"]: - return arg_dtype, "%s_%s" % (tpname, name) - - if name in ["real", "imag", "abs"]: - return np.dtype(arg_dtype.type(0).real), "%s_%s" % (tpname, name) - - if name == "dot": - scalar_dtype, offset, field_name = arg_dtypes[0].fields["s0"] - return scalar_dtype, name - - return None - - -def single_arg_function_mangler(name, arg_dtypes): +def single_arg_function_mangler(target, name, arg_dtypes): if len(arg_dtypes) == 1: dtype, = arg_dtypes return dtype, name diff --git a/loopy/library/preamble.py b/loopy/library/preamble.py deleted file mode 100644 index 86c1fecec1019e9b0b5938464c27f3c07932211c..0000000000000000000000000000000000000000 --- a/loopy/library/preamble.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import division - -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -import numpy as np - - -def default_preamble_generator(seen_dtypes, seen_functions): - from loopy.library.reduction import reduction_preamble_generator - - for result in reduction_preamble_generator(seen_dtypes, seen_functions): - yield result - - has_double = False - has_complex = False - - for dtype in seen_dtypes: - if dtype in [np.float64, np.complex128]: - has_double = True - if dtype.kind == "c": - has_complex = True - - if has_double: - yield ("00_enable_double", """ - #if __OPENCL_C_VERSION__ < 120 - #pragma OPENCL EXTENSION cl_khr_fp64: enable - #endif - """) - - if has_complex: - if has_double: - yield ("10_include_complex_header", """ - #define PYOPENCL_DEFINE_CDOUBLE - - #include <pyopencl-complex.h> - """) - else: - yield ("10_include_complex_header", """ - #include <pyopencl-complex.h> - """) - - c_funcs = set(func.c_name for func in seen_functions) - if "int_floor_div" in c_funcs: - yield ("05_int_floor_div", """ - #define int_floor_div(a,b) \ - (( (a) - \ - ( ( (a)<0 ) != ( (b)<0 )) \ - *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ - / (b) ) - """) - - if "int_floor_div_pos_b" in c_funcs: - yield ("05_int_floor_div_pos_b", """ - #define int_floor_div_pos_b(a,b) ( \ - ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ - ) - """) - -# vim: foldmethod=marker diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 650d1e45218ff21a8140288cb3e84762b98046ef..9a4330658515c0eccfc6566d1a478adb6afbecf2 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -27,6 +27,7 @@ from pymbolic import var import numpy as np from loopy.symbolic import FunctionIdentifier +from loopy.diagnostic import LoopyError class ReductionOperation(object): @@ -34,7 +35,7 @@ class ReductionOperation(object): equality-comparable. """ - def result_dtype(self, arg_dtype, inames): + def result_dtype(self, target, arg_dtype, inames): raise NotImplementedError def neutral_element(self, dtype, inames): @@ -54,32 +55,50 @@ class ReductionOperation(object): def __ne__(self, other): return not self.__eq__(other) + @staticmethod + def parse_result_type(target, op_type): + try: + return np.dtype(op_type) + except TypeError: + pass + + if op_type.startswith("vec_"): + try: + return target.get_or_register_dtype(op_type[4:]) + except AttributeError: + pass + + raise LoopyError("unable to parse reduction type: '%s'" + % op_type) + class ScalarReductionOperation(ReductionOperation): - def __init__(self, forced_result_dtype=None): + def __init__(self, forced_result_type=None): """ - :arg forced_result_dtype: Force the reduction result to be of this type. + :arg forced_result_type: Force the reduction result to be of this type. + May be a string identifying the type for the backend under + consideration. """ - self.forced_result_dtype = forced_result_dtype + self.forced_result_type = forced_result_type - def result_dtype(self, arg_dtype, inames): - if self.forced_result_dtype is not None: - return self.forced_result_dtype + def result_dtype(self, target, arg_dtype, inames): + if self.forced_result_type is not None: + return self.parse_result_type(target, self.forced_result_type) return arg_dtype def __hash__(self): - return hash((type(self), self.forced_result_dtype)) + return hash((type(self), self.forced_result_type)) def __eq__(self, other): return (type(self) == type(other) - and self.forced_result_dtype == other.forced_result_dtype) + and self.forced_result_type == other.forced_result_type) def __str__(self): result = type(self).__name__.replace("ReductionOperation", "").lower() - if self.forced_result_dtype is not None: - result = "%s<%s>" % (result, str(self.forced_result_dtype)) + if self.forced_result_type is not None: + result = "%s<%s>" % (result, str(self.forced_result_type)) return result @@ -136,15 +155,14 @@ class _ArgExtremumReductionOperation(ReductionOperation): def prefix(self, dtype): return "loopy_arg%s_%s" % (self.which, dtype.type.__name__) - def result_dtype(self, dtype, inames): + def result_dtype(self, target, dtype, inames): try: return ARGEXT_STRUCT_DTYPES[dtype] except KeyError: struct_dtype = np.dtype([("value", dtype), ("index", np.int32)]) ARGEXT_STRUCT_DTYPES[dtype] = struct_dtype - from pyopencl.tools import get_or_register_dtype - get_or_register_dtype(self.prefix(dtype)+"_result", struct_dtype) + target.get_or_register_dtype(self.prefix(dtype)+"_result", struct_dtype) return struct_dtype def neutral_element(self, dtype, inames): @@ -188,11 +206,10 @@ class ArgExtFunction(FunctionIdentifier): return (self.reduction_op, self.scalar_dtype, self.name, self.inames) -def get_argext_preamble(func_id): +def get_argext_preamble(target, func_id): op = func_id.reduction_op prefix = op.prefix(func_id.scalar_dtype) - from pyopencl.tools import dtype_to_ctype from pymbolic.mapper.c_code import CCodeMapper c_code_mapper = CCodeMapper() @@ -225,7 +242,7 @@ def get_argext_preamble(func_id): } """ % dict( type_name=prefix+"_result", - scalar_type=dtype_to_ctype(func_id.scalar_dtype), + scalar_type=target.dtype_to_typename(func_id.scalar_dtype), prefix=prefix, neutral=c_code_mapper( op.neutral_sign*get_le_neutral(func_id.scalar_dtype)), @@ -267,20 +284,8 @@ def parse_reduction_op(name): op_name = red_op_match.group(1) op_type = red_op_match.group(2) - try: - op_dtype = np.dtype(op_type) - except TypeError: - op_dtype = None - - if op_dtype is None and op_type.startswith("vec_"): - import pyopencl.array as cl_array - try: - op_dtype = getattr(cl_array.vec, op_type[4:]) - except AttributeError: - op_dtype = None - - if op_name in _REDUCTION_OPS and op_dtype is not None: - return _REDUCTION_OPS[op_name](op_dtype) + if op_name in _REDUCTION_OPS: + return _REDUCTION_OPS[op_name](op_type) if name in _REDUCTION_OPS: return _REDUCTION_OPS[name]() @@ -295,18 +300,27 @@ def parse_reduction_op(name): # }}} -def reduction_function_mangler(func_id, arg_dtypes): +def reduction_function_mangler(target, func_id, arg_dtypes): if isinstance(func_id, ArgExtFunction): + from loopy.target.opencl import OpenCLTarget + if not isinstance(target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + op = func_id.reduction_op - return (op.result_dtype(func_id.scalar_dtype, func_id.inames), + return (op.result_dtype(target, func_id.scalar_dtype, func_id.inames), "%s_%s" % (op.prefix(func_id.scalar_dtype), func_id.name)) return None -def reduction_preamble_generator(seen_dtypes, seen_functions): +def reduction_preamble_generator(target, seen_dtypes, seen_functions): + from loopy.target.opencl import OpenCLTarget + for func in seen_functions: if isinstance(func.name, ArgExtFunction): - yield get_argext_preamble(func.name) + if not isinstance(target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + + yield get_argext_preamble(target, func.name) # vim: fdm=marker diff --git a/loopy/library/symbol.py b/loopy/library/symbol.py deleted file mode 100644 index 4ad22d58cebcff297a21509d43ba226eb4b1da8e..0000000000000000000000000000000000000000 --- a/loopy/library/symbol.py +++ /dev/null @@ -1,45 +0,0 @@ -from __future__ import division - -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -import numpy as np - - -def opencl_symbol_mangler(name): - # FIXME: should be more picky about exact names - if name.startswith("FLT_"): - return np.dtype(np.float32), name - elif name.startswith("DBL_"): - return np.dtype(np.float64), name - elif name.startswith("M_"): - if name.endswith("_F"): - return np.dtype(np.float32), name - else: - return np.dtype(np.float64), name - elif name == "INFINITY": - return np.dtype(np.float32), name - else: - return None - -# vim: foldmethod=marker diff --git a/loopy/preprocess.py b/loopy/preprocess.py index d4275ea6a1114e9994e11fb00f008ff4ef584f4d..af279fc4e56d3e6106231db223750555714abed5 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -24,10 +24,7 @@ THE SOFTWARE. import six -from six.moves import range import numpy as np -import pyopencl as cl -import pyopencl.characterize as cl_char from loopy.diagnostic import ( LoopyError, LoopyWarning, WriteRaceConditionWarning, warn, LoopyAdvisory) @@ -40,6 +37,36 @@ import logging logger = logging.getLogger(__name__) +# {{{ propagate target info + +def propagate_target(kernel): + import loopy as lp + new_args = [] + + for arg in kernel.args: + dtype = arg.picklable_dtype + if dtype is not None and dtype is not lp.auto: + dtype = dtype.with_target(kernel.target) + + new_args.append(arg.copy(dtype=dtype)) + + new_temporary_variables = {} + for name, temp in six.iteritems(kernel.temporary_variables): + dtype = temp.picklable_dtype + if dtype is not None and dtype is not lp.auto: + dtype = dtype.with_target(kernel.target) + + new_temporary_variables[name] = temp.copy(dtype=dtype) + + kernel = kernel.copy( + args=new_args, + temporary_variables=new_temporary_variables) + + return kernel + +# }}} + + # {{{ infer types def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): @@ -402,7 +429,8 @@ def realize_reduction(kernel, insn_id_filter=None): new_temporary_variables[target_var_name] = TemporaryVariable( name=target_var_name, shape=(), - dtype=expr.operation.result_dtype(arg_dtype, expr.inames), + dtype=expr.operation.result_dtype( + kernel.target, arg_dtype, expr.inames), is_local=False) outer_insn_inames = temp_kernel.insn_inames(insn) @@ -992,90 +1020,16 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # }}} -# {{{ temp storage adjust for bank conflict - -def adjust_local_temp_var_storage(kernel, device): - logger.debug("%s: adjust temp var storage" % kernel.name) - - new_temp_vars = {} - - lmem_size = cl_char.usable_local_mem_size(device) - for temp_var in six.itervalues(kernel.temporary_variables): - if not temp_var.is_local: - new_temp_vars[temp_var.name] = \ - temp_var.copy(storage_shape=temp_var.shape) - continue - - other_loctemp_nbytes = [ - tv.nbytes - for tv in six.itervalues(kernel.temporary_variables) - if tv.is_local and tv.name != temp_var.name] - - storage_shape = temp_var.storage_shape - - if storage_shape is None: - storage_shape = temp_var.shape - - storage_shape = list(storage_shape) - - # sizes of all dims except the last one, which we may change - # below to avoid bank conflicts - from pytools import product - - if device.local_mem_type == cl.device_local_mem_type.GLOBAL: - # FIXME: could try to avoid cache associativity disasters - new_storage_shape = storage_shape - - elif device.local_mem_type == cl.device_local_mem_type.LOCAL: - min_mult = cl_char.local_memory_bank_count(device) - good_incr = None - new_storage_shape = storage_shape - min_why_not = None - - for increment in range(storage_shape[-1]//2): - - test_storage_shape = storage_shape[:] - test_storage_shape[-1] = test_storage_shape[-1] + increment - new_mult, why_not = cl_char.why_not_local_access_conflict_free( - device, temp_var.dtype.itemsize, - temp_var.shape, test_storage_shape) - - # will choose smallest increment 'automatically' - if new_mult < min_mult: - new_lmem_use = (sum(other_loctemp_nbytes) - + temp_var.dtype.itemsize*product(test_storage_shape)) - if new_lmem_use < lmem_size: - new_storage_shape = test_storage_shape - min_mult = new_mult - min_why_not = why_not - good_incr = increment - - if min_mult != 1: - from warnings import warn - from loopy.diagnostic import LoopyAdvisory - warn("could not find a conflict-free mem layout " - "for local variable '%s' " - "(currently: %dx conflict, increment: %s, reason: %s)" - % (temp_var.name, min_mult, good_incr, min_why_not), - LoopyAdvisory) - else: - from warnings import warn - warn("unknown type of local memory") - - new_storage_shape = storage_shape - - new_temp_vars[temp_var.name] = temp_var.copy(storage_shape=new_storage_shape) - - return kernel.copy(temporary_variables=new_temp_vars) - -# }}} - - preprocess_cache = PersistentDict("loopy-preprocess-cache-v2-"+DATA_MODEL_VERSION, key_builder=LoopyKeyBuilder()) def preprocess_kernel(kernel, device=None): + if device is not None: + from warnings import warn + warn("passing 'device' to preprocess_kernel() is deprecated", + DeprecationWarning, stacklevel=2) + from loopy.kernel import kernel_state if kernel.state != kernel_state.INITIAL: raise LoopyError("cannot re-preprocess an already preprocessed " @@ -1085,14 +1039,10 @@ def preprocess_kernel(kernel, device=None): from loopy import CACHING_ENABLED if CACHING_ENABLED: - if device is not None: - device_id = device.persistent_unique_id - else: - device_id = None + input_kernel = kernel - pp_cache_key = (kernel, device_id) try: - result = preprocess_cache[pp_cache_key] + result = preprocess_cache[kernel] logger.info("%s: preprocess cache hit" % kernel.name) return result except KeyError: @@ -1134,20 +1084,29 @@ def preprocess_kernel(kernel, device=None): kernel = find_boostability(kernel) kernel = limit_boostability(kernel) - if device is not None: - kernel = adjust_local_temp_var_storage(kernel, device) - else: - from loopy.diagnostic import warn - warn(kernel, "no_device_in_preprocess", - "no device parameter was passed to loopy.preprocess") + kernel = kernel.target.preprocess(kernel) logger.info("%s: preprocess done" % kernel.name) kernel = kernel.copy( state=kernel_state.PREPROCESSED) + # {{{ propagate target info + + # PicklableDtype instances for example need to know the target they're working + # towards in order to pickle and unpickle them. This is the first pass that + # uses caching, so we need to be ready to pickle. This means propagating + # this target information. + + if CACHING_ENABLED: + input_kernel = propagate_target(input_kernel) + + kernel = propagate_target(kernel) + + # }}} + if CACHING_ENABLED: - preprocess_cache[pp_cache_key] = kernel + preprocess_cache[input_kernel] = kernel return kernel diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 3b3eba49e8cec0d62fd2a57bcb19d795dcb40167..10acada145a3b65cd3232dcf06d0657672c5f347 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -243,10 +243,8 @@ class Reduction(AlgebraicLeaf): def __init__(self, operation, inames, expr): assert isinstance(inames, tuple) - - if isinstance(operation, str): - from loopy.library.reduction import parse_reduction_op - operation = parse_reduction_op(operation) + from loopy.library.reduction import ReductionOperation + assert isinstance(operation, ReductionOperation) self.operation = operation self.inames = inames @@ -620,7 +618,27 @@ class FunctionToPrimitiveMapper(IdentityMapper): turns those into the actual pymbolic primitives used for that. """ + def _parse_reduction(self, operation, inames, red_expr): + if isinstance(inames, Variable): + inames = (inames,) + + if not isinstance(inames, (tuple)): + raise TypeError("iname argument to reduce() must be a symbol " + "or a tuple of symbols") + + processed_inames = [] + for iname in inames: + if not isinstance(iname, Variable): + raise TypeError("iname argument to reduce() must be a symbol " + "or a tuple or a tuple of symbols") + + processed_inames.append(iname.name) + + return Reduction(operation, tuple(processed_inames), red_expr) + def map_call(self, expr): + from loopy.library.reduction import parse_reduction_op + from pymbolic.primitives import Variable if not isinstance(expr.function, Variable): return IdentityMapper.map_call(self, expr) @@ -644,51 +662,38 @@ class FunctionToPrimitiveMapper(IdentityMapper): elif name == "reduce": if len(expr.parameters) == 3: operation, inames, red_expr = expr.parameters + + if not isinstance(operation, Variable): + raise TypeError("operation argument to reduce() " + "must be a symbol") + + operation = parse_reduction_op(operation.name) + return self._parse_reduction(operation, inames, self.rec(red_expr)) else: raise TypeError("invalid 'reduce' calling sequence") elif name == "if": - if len(expr.parameters) in [2, 3]: + if len(expr.parameters) == 3: from pymbolic.primitives import If return If(*expr.parameters) else: - raise TypeError("if takes two or three arguments") + raise TypeError("if takes three arguments") else: # see if 'name' is an existing reduction op - from loopy.library.reduction import parse_reduction_op - if parse_reduction_op(name): + operation = parse_reduction_op(name) + if operation: if len(expr.parameters) != 2: raise RuntimeError("invalid invocation of " "reduction operation '%s'" % expr.function.name) - operation = expr.function inames, red_expr = expr.parameters + return self._parse_reduction(operation, inames, self.rec(red_expr)) + else: return IdentityMapper.map_call(self, expr) - red_expr = self.rec(red_expr) - - if not isinstance(operation, Variable): - raise TypeError("operation argument to reduce() must be a symbol") - operation = operation.name - if isinstance(inames, Variable): - inames = (inames,) - - if not isinstance(inames, (tuple)): - raise TypeError("iname argument to reduce() must be a symbol " - "or a tuple of symbols") - - processed_inames = [] - for iname in inames: - if not isinstance(iname, Variable): - raise TypeError("iname argument to reduce() must be a symbol " - "or a tuple or a tuple of symbols") - - processed_inames.append(iname.name) - - return Reduction(operation, tuple(processed_inames), red_expr) # {{{ customization to pymbolic parser diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d36c39044cb4448d59e5dae78f5cca13afb06d11 --- /dev/null +++ b/loopy/target/__init__.py @@ -0,0 +1,84 @@ +"""Base target interface.""" + +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2015 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +class TargetBase(object): + """Objects of this type must be picklable.""" + + # {{{ persistent hashing + + hash_fields = [] + comparison_fields = [] + + def update_persistent_hash(self, key_hash, key_builder): + for field_name in self.hash_fields: + key_builder.rec(key_hash, getattr(self, field_name)) + + def __eq__(self, other): + if type(self) != type(other): + return False + + for field_name in self.comparison_fields: + if getattr(self, field_name) != getattr(other, field_name): + return False + + return True + + def __ne__(self, other): + return not self.__eq__(other) + + # }}} + + def preprocess(self, kernel): + return kernel + + def pre_codegen_check(self, kernel): + pass + + def function_manglers(self): + return [] + + def symbol_manglers(self): + return [] + + def preamble_generators(self): + return [] + + def get_or_register_dtype(self, names, dtype=None): + raise NotImplementedError() + + def dtype_to_typename(self, dtype): + raise NotImplementedError() + + def is_vector_dtype(self, dtype): + raise NotImplementedError() + + def vector_dtype(self, base, count): + raise NotImplementedError() + + def alignment_requirement(self, type_decl): + import struct + return struct.calcsize(type_decl.struct_format()) diff --git a/loopy/target/opencl/__init__.py b/loopy/target/opencl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..04efdedabb6e281b99c8ffae3a5be53e084524ed --- /dev/null +++ b/loopy/target/opencl/__init__.py @@ -0,0 +1,274 @@ +"""OpenCL target independent of PyOpenCL.""" + +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2015 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np + +from loopy.target import TargetBase + + +# {{{ type registry + +def _register_types(): + from loopy.target.opencl.compyte.dtypes import ( + _fill_dtype_registry, get_or_register_dtype) + import struct + + _fill_dtype_registry(respect_windows=False, include_bool=False) + + # complex number support left out + + is_64_bit = struct.calcsize('@P') * 8 == 64 + if not is_64_bit: + get_or_register_dtype( + ["unsigned long", "unsigned long int"], np.uint64) + get_or_register_dtype( + ["signed long", "signed long int", "long int"], np.int64) + +_register_types() + +# }}} + + +# {{{ vector types + +class vec: + pass + + +def _create_vector_types(): + field_names = ["x", "y", "z", "w"] + + from loopy.target.opencl.compyte.dtypes import get_or_register_dtype + + vec.types = {} + vec.type_to_scalar_and_count = {} + + counts = [2, 3, 4, 8, 16] + + for base_name, base_type in [ + ('char', np.int8), + ('uchar', np.uint8), + ('short', np.int16), + ('ushort', np.uint16), + ('int', np.int32), + ('uint', np.uint32), + ('long', np.int64), + ('ulong', np.uint64), + ('float', np.float32), + ('double', np.float64), + ]: + for count in counts: + name = "%s%d" % (base_name, count) + + titles = field_names[:count] + + padded_count = count + if count == 3: + padded_count = 4 + + names = ["s%d" % i for i in range(count)] + while len(names) < padded_count: + names.append("padding%d" % (len(names)-count)) + + if len(titles) < len(names): + titles.extend((len(names)-len(titles))*[None]) + + try: + dtype = np.dtype(dict( + names=names, + formats=[base_type]*padded_count, + titles=titles)) + except NotImplementedError: + try: + dtype = np.dtype([((n, title), base_type) + for (n, title) in zip(names, titles)]) + except TypeError: + dtype = np.dtype([(n, base_type) for (n, title) + in zip(names, titles)]) + + get_or_register_dtype(name, dtype) + + setattr(vec, name, dtype) + + def create_array(dtype, count, padded_count, *args, **kwargs): + if len(args) < count: + from warnings import warn + warn("default values for make_xxx are deprecated;" + " instead specify all parameters or use" + " array.vec.zeros_xxx", DeprecationWarning) + padded_args = tuple(list(args)+[0]*(padded_count-len(args))) + array = eval("array(padded_args, dtype=dtype)", + dict(array=np.array, padded_args=padded_args, + dtype=dtype)) + for key, val in kwargs.items(): + array[key] = val + return array + + setattr(vec, "make_"+name, staticmethod(eval( + "lambda *args, **kwargs: create_array(dtype, %i, %i, " + "*args, **kwargs)" % (count, padded_count), + dict(create_array=create_array, dtype=dtype)))) + setattr(vec, "filled_"+name, staticmethod(eval( + "lambda val: vec.make_%s(*[val]*%i)" % (name, count)))) + setattr(vec, "zeros_"+name, + staticmethod(eval("lambda: vec.filled_%s(0)" % (name)))) + setattr(vec, "ones_"+name, + staticmethod(eval("lambda: vec.filled_%s(1)" % (name)))) + + vec.types[np.dtype(base_type), count] = dtype + vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count + +_create_vector_types() + +# }}} + + +# {{{ function mangler + +def opencl_function_mangler(target, name, arg_dtypes): + if not isinstance(name, str): + return None + + if name in ["max", "min"] and len(arg_dtypes) == 2: + dtype = np.find_common_type([], arg_dtypes) + + if dtype.kind == "c": + raise RuntimeError("min/max do not support complex numbers") + + if dtype.kind == "f": + name = "f" + name + + return dtype, name + + if name in "atan2" and len(arg_dtypes) == 2: + return arg_dtypes[0], name + + if name == "dot": + scalar_dtype, offset, field_name = arg_dtypes[0].fields["s0"] + return scalar_dtype, name + + return None + +# }}} + + +# {{{ symbol mangler + +def opencl_symbol_mangler(target, name): + # FIXME: should be more picky about exact names + if name.startswith("FLT_"): + return np.dtype(np.float32), name + elif name.startswith("DBL_"): + return np.dtype(np.float64), name + elif name.startswith("M_"): + if name.endswith("_F"): + return np.dtype(np.float32), name + else: + return np.dtype(np.float64), name + elif name == "INFINITY": + return np.dtype(np.float32), name + else: + return None + +# }}} + + +# {{{ preamble generator + +def opencl_preamble_generator(target, seen_dtypes, seen_functions): + has_double = False + + for dtype in seen_dtypes: + if dtype in [np.float64, np.complex128]: + has_double = True + + if has_double: + yield ("00_enable_double", """ + #if __OPENCL_C_VERSION__ < 120 + #pragma OPENCL EXTENSION cl_khr_fp64: enable + #endif + """) + + c_funcs = set(func.c_name for func in seen_functions) + if "int_floor_div" in c_funcs: + yield ("05_int_floor_div", """ + #define int_floor_div(a,b) \ + (( (a) - \ + ( ( (a)<0 ) != ( (b)<0 )) \ + *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ + / (b) ) + """) + + if "int_floor_div_pos_b" in c_funcs: + yield ("05_int_floor_div_pos_b", """ + #define int_floor_div_pos_b(a,b) ( \ + ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ + ) + """) + +# }}} + + +# {{{ target + +class OpenCLTarget(TargetBase): + def function_manglers(self): + return ( + super(OpenCLTarget, self).function_manglers() + [ + opencl_function_mangler + ]) + + def symbol_manglers(self): + return ( + super(OpenCLTarget, self).symbol_manglers() + [ + opencl_symbol_mangler + ]) + + def preamble_generators(self): + from loopy.library.reduction import reduction_preamble_generator + return ( + super(OpenCLTarget, self).preamble_generators() + [ + opencl_preamble_generator, + reduction_preamble_generator + ]) + + def get_or_register_dtype(self, names, dtype=None): + from loopy.target.opencl.compyte.dtypes import get_or_register_dtype + return get_or_register_dtype(names, dtype) + + def dtype_to_typename(self, dtype): + from loopy.target.opencl.compyte.dtypes import dtype_to_ctype + return dtype_to_ctype(dtype) + + def is_vector_dtype(self, dtype): + return list(vec.types.values()) + + def get_vector_dtype(self, base, count): + return vec.types[base, count] + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/target/opencl/compyte b/loopy/target/opencl/compyte new file mode 160000 index 0000000000000000000000000000000000000000..5d54e1b2b7f28d3e779029ac0b4aa5f957829f23 --- /dev/null +++ b/loopy/target/opencl/compyte @@ -0,0 +1 @@ +Subproject commit 5d54e1b2b7f28d3e779029ac0b4aa5f957829f23 diff --git a/loopy/target/pyopencl/__init__.py b/loopy/target/pyopencl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0fff7643c5919d95c612a411d083a3388bcfd384 --- /dev/null +++ b/loopy/target/pyopencl/__init__.py @@ -0,0 +1,282 @@ +"""OpenCL target integrated with PyOpenCL.""" + +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2015 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +from six.moves import range + +import numpy as np + +from loopy.target.opencl import OpenCLTarget + +import pyopencl as cl +import pyopencl.characterize as cl_char + +import logging +logger = logging.getLogger(__name__) + + +# {{{ temp storage adjust for bank conflict + +def adjust_local_temp_var_storage(kernel, device): + logger.debug("%s: adjust temp var storage" % kernel.name) + + new_temp_vars = {} + + lmem_size = cl_char.usable_local_mem_size(device) + for temp_var in six.itervalues(kernel.temporary_variables): + if not temp_var.is_local: + new_temp_vars[temp_var.name] = \ + temp_var.copy(storage_shape=temp_var.shape) + continue + + other_loctemp_nbytes = [ + tv.nbytes + for tv in six.itervalues(kernel.temporary_variables) + if tv.is_local and tv.name != temp_var.name] + + storage_shape = temp_var.storage_shape + + if storage_shape is None: + storage_shape = temp_var.shape + + storage_shape = list(storage_shape) + + # sizes of all dims except the last one, which we may change + # below to avoid bank conflicts + from pytools import product + + if device.local_mem_type == cl.device_local_mem_type.GLOBAL: + # FIXME: could try to avoid cache associativity disasters + new_storage_shape = storage_shape + + elif device.local_mem_type == cl.device_local_mem_type.LOCAL: + min_mult = cl_char.local_memory_bank_count(device) + good_incr = None + new_storage_shape = storage_shape + min_why_not = None + + for increment in range(storage_shape[-1]//2): + + test_storage_shape = storage_shape[:] + test_storage_shape[-1] = test_storage_shape[-1] + increment + new_mult, why_not = cl_char.why_not_local_access_conflict_free( + device, temp_var.dtype.itemsize, + temp_var.shape, test_storage_shape) + + # will choose smallest increment 'automatically' + if new_mult < min_mult: + new_lmem_use = (sum(other_loctemp_nbytes) + + temp_var.dtype.itemsize*product(test_storage_shape)) + if new_lmem_use < lmem_size: + new_storage_shape = test_storage_shape + min_mult = new_mult + min_why_not = why_not + good_incr = increment + + if min_mult != 1: + from warnings import warn + from loopy.diagnostic import LoopyAdvisory + warn("could not find a conflict-free mem layout " + "for local variable '%s' " + "(currently: %dx conflict, increment: %s, reason: %s)" + % (temp_var.name, min_mult, good_incr, min_why_not), + LoopyAdvisory) + else: + from warnings import warn + warn("unknown type of local memory") + + new_storage_shape = storage_shape + + new_temp_vars[temp_var.name] = temp_var.copy(storage_shape=new_storage_shape) + + return kernel.copy(temporary_variables=new_temp_vars) + +# }}} + + +# {{{ check sizes against device properties + +def check_sizes(kernel, device): + import loopy as lp + + if device is None: + from loopy.diagnostic import warn + warn(kernel, "no_device_in_pre_codegen_checks", + "No device parameter was passed to the PyOpenCLTarget. " + "Perhaps you want to pass a device to benefit from " + "additional checking.") + return + + from loopy.diagnostic import LoopyAdvisory, LoopyError + + parameters = {} + for arg in kernel.args: + if isinstance(arg, lp.ValueArg) and arg.approximately is not None: + parameters[arg.name] = arg.approximately + + glens, llens = kernel.get_grid_sizes_as_exprs() + + if (max(len(glens), len(llens)) + > device.max_work_item_dimensions): + raise LoopyError("too many work item dimensions") + + from pymbolic import evaluate + from pymbolic.mapper.evaluator import UnknownVariableError + try: + glens = evaluate(glens, parameters) + llens = evaluate(llens, parameters) + except UnknownVariableError as name: + from warnings import warn + warn("could not check axis bounds because no value " + "for variable '%s' was passed to check_kernels()" + % name, LoopyAdvisory) + else: + for i in range(len(llens)): + if llens[i] > device.max_work_item_sizes[i]: + raise LoopyError("group axis %d too big" % i) + + from pytools import product + if product(llens) > device.max_work_group_size: + raise LoopyError("work group too big") + + from pyopencl.characterize import usable_local_mem_size + if kernel.local_mem_use() > usable_local_mem_size(device): + raise LoopyError("using too much local memory") + + from loopy.kernel.data import ConstantArg + const_arg_count = sum( + 1 for arg in kernel.args + if isinstance(arg, ConstantArg)) + + if const_arg_count > device.max_constant_args: + raise LoopyError("too many constant arguments") + +# }}} + + +def pyopencl_function_mangler(target, name, arg_dtypes): + if len(arg_dtypes) == 1 and isinstance(name, str): + arg_dtype, = arg_dtypes + + if arg_dtype.kind == "c": + if arg_dtype == np.complex64: + tpname = "cfloat" + elif arg_dtype == np.complex128: + tpname = "cdouble" + else: + raise RuntimeError("unexpected complex type '%s'" % arg_dtype) + + if name in ["sqrt", "exp", "log", + "sin", "cos", "tan", + "sinh", "cosh", "tanh", + "conj"]: + return arg_dtype, "%s_%s" % (tpname, name) + + if name in ["real", "imag", "abs"]: + return np.dtype(arg_dtype.type(0).real), "%s_%s" % (tpname, name) + + return None + + +# {{{ preamble generator + +def pyopencl_preamble_generator(target, seen_dtypes, seen_functions): + has_double = False + has_complex = False + + for dtype in seen_dtypes: + if dtype in [np.float64, np.complex128]: + has_double = True + if dtype.kind == "c": + has_complex = True + + if has_complex: + if has_double: + yield ("10_include_complex_header", """ + #define PYOPENCL_DEFINE_CDOUBLE + + #include <pyopencl-complex.h> + """) + else: + yield ("10_include_complex_header", """ + #include <pyopencl-complex.h> + """) + +# }}} + + +# {{{ + +class PyOpenCLTarget(OpenCLTarget): + def __init__(self, device=None): + super(PyOpenCLTarget, self).__init__() + + self.device = device + + hash_fields = ["device"] + comparison_fields = ["device"] + + def function_manglers(self): + return ( + super(PyOpenCLTarget, self).function_manglers() + [ + pyopencl_function_mangler + ]) + + def preamble_generators(self): + return ([ + pyopencl_preamble_generator + ] + super(PyOpenCLTarget, self).preamble_generators()) + + def preprocess(self, kernel): + return kernel + + def pre_codegen_check(self, kernel): + check_sizes(kernel, self.device) + + def get_or_register_dtype(self, names, dtype=None): + from pyopencl.compyte.dtypes import get_or_register_dtype + return get_or_register_dtype(names, dtype) + + def dtype_to_typename(self, dtype): + from pyopencl.compyte.dtypes import dtype_to_ctype + return dtype_to_ctype(dtype) + + def is_vector_dtype(self, dtype): + from pyopencl.array import vec + return dtype in list(vec.types.values()) + + def vector_dtype(self, base, count): + from pyopencl.array import vec + return vec.types[base, count] + + def alignment_requirement(self, type_decl): + import pyopencl._pvt_struct as _struct + return _struct.calcsize(type_decl.struct_format()) + +# }}} + + +# vim: foldmethod=marker diff --git a/loopy/tools.py b/loopy/tools.py index 19fdcf2ce5f5f1c02125edb616d5342f0b8f5b54..29005cfbb6aca798d6b0f51c3dabf9a2b75e53a4 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -106,7 +106,7 @@ class PicklableDtype(object): The issues are the following - :class:`numpy.dtype` objects for custom types in :mod:`loopy` are usually - registered in the :mod:`pyopencl` dtype registry. This registration may + registered in the target's dtype registry. This registration may have been lost after unpickling. This container restores it implicitly, as part of unpickling. @@ -115,10 +115,13 @@ class PicklableDtype(object): by retrieving the 'canonical' type from the dtype registry. """ - def __init__(self, dtype): + def __init__(self, dtype, target=None): + assert not isinstance(dtype, PicklableDtype) + if dtype is None: raise TypeError("may not pass None to construct PicklableDtype") + self.target = target self.dtype = np.dtype(dtype) def __hash__(self): @@ -133,14 +136,25 @@ class PicklableDtype(object): return not self.__eq__(self, other) def __getstate__(self): - from pyopencl.compyte.dtypes import DTYPE_TO_NAME - c_name = DTYPE_TO_NAME[self.dtype] + if self.target is None: + raise RuntimeError("unable to pickle dtype: target not known") - return (c_name, self.dtype) + c_name = self.target.dtype_to_typename(self.dtype) + return (self.target, c_name, self.dtype) def __setstate__(self, state): - name, dtype = state - from pyopencl.tools import get_or_register_dtype - self.dtype = get_or_register_dtype([name], dtype) + target, name, dtype = state + self.target = target + self.dtype = self.target.get_or_register_dtype([name], dtype) + + def with_target(self, target): + if (self.target is not None + and target is not self.target): + raise RuntimeError("target already set to different value") + + return PicklableDtype(self.dtype, target) + + def assert_has_target(self): + assert self.target is not None # vim: foldmethod=marker diff --git a/loopy/version.py b/loopy/version.py index 8c58c4fe29bfdc35f31f9f2168edda7137e3a1bb..8f68a4958da821981a6922bd14335a2bc487473a 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -25,4 +25,4 @@ VERSION = (2014, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS -DATA_MODEL_VERSION = "v2" +DATA_MODEL_VERSION = "v5" diff --git a/setup.py b/setup.py index d3d20fce6645c84b35f5f2da58fc3786fc576b70..b3cefdac0ad16fa7b4ffdf9969a47ac509f7a257 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,6 @@ setup(name="loo.py", install_requires=[ "pytools>=2014.2", - "pyopencl>=2014.1", "pymbolic>=2014.1.1", "cgen>=2013.1.2", "islpy>=2014.2",