diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 32b233900a9cd6ba491502185ccd33ac5d7544d4..367214148a1711fb3e55a752c7a8d23e3f7d3e5a 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -37,7 +37,8 @@ from pytools import UniqueNameGenerator, generate_unique_names from loopy.library.function import ( default_function_mangler, - single_arg_function_mangler) + single_arg_function_mangler, + default_function_identifiers) from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.tools import natsorted @@ -143,6 +144,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): to instances of :class:`loopy.kernel.data.IndexTag`. .. attribute:: function_manglers + .. attribute:: function_identifiers .. attribute:: symbol_manglers .. attribute:: substitutions @@ -200,6 +202,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): default_function_mangler, single_arg_function_mangler, ], + function_identifiers=set(), symbol_manglers=[], iname_slab_increments={}, @@ -265,6 +268,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT + # Populating the function identifiers based on the target and the default + # function identifiers + function_identifiers = (default_function_identifiers() | + target.get_device_ast_builder().function_identifiers()) + ImmutableRecordWithoutPickling.__init__(self, domains=domains, instructions=instructions, @@ -284,6 +292,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, + function_identifiers=function_identifiers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 0daf327f441031662b46a4a83b4fc40e73eb5688..ee17bd1a747a5aa825e2e12f7d5ff5fc7068fff5 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -27,7 +27,7 @@ THE SOFTWARE. import numpy as np -from pymbolic.mapper import CSECachingMapperMixin +from pymbolic.mapper import CSECachingMapperMixin, Collector from loopy.tools import intern_frozenset_of_ids from loopy.symbolic import IdentityMapper, WalkMapper from loopy.kernel.data import ( @@ -1829,6 +1829,76 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True): # }}} +# {{{ lookup functions + + +class FunctionScoper(IdentityMapper): + def __init__(self, function_ids): + self.function_ids = function_ids + + def map_call(self, expr): + if expr.function.name in self.function_ids: + # 1. need to change the function to ScopedFunction instead of Variable + from pymbolic.primitives import Call + from loopy.symbolic import ScopedFunction + + return super(FunctionScoper, self).map_call( + Call(function=ScopedFunction(expr.function.name), + parameters=expr.parameters)) + + else: + return super(FunctionScoper, self).map_call(expr) + + def map_call_with_kwargs(self, expr): + if expr.function.name in self.function_ids: + from pymbolic.primitives import CallWithKwargs + from loopy.symbolic import ScopedFunction + return super(FunctionScoper, self).map_call_with_kwargs( + CallWithKwargs(function=ScopedFunction(expr.function.name), + parameters=expr.parameters, + kw_parameters=expr.kw_parameters)) + else: + return super(FunctionScoper, self).map_call_with_kwargs(expr) + + +class ScopedFunctionCollector(Collector): + + def map_scoped_function(self, expr): + return set([expr.name]) + + +def scope_functions(kernel): + func_ids = kernel.function_identifiers.copy() + + from loopy.kernel.instruction import CInstruction, _DataObliviousInstruction + function_scoper = FunctionScoper(func_ids) + scoped_function_collector = ScopedFunctionCollector() + scoped_functions = set() + + new_insns = [] + + for insn in kernel.instructions: + if isinstance(insn, (MultiAssignmentBase, CInstruction)): + new_insn = insn.copy(expression=function_scoper(insn.expression)) + scoped_functions.update(scoped_function_collector(new_insn.expression)) + new_insns.append(new_insn) + elif isinstance(insn, _DataObliviousInstruction): + new_insns.append(insn) + else: + raise NotImplementedError("scope_function not implemented for %s" % + type(insn)) + + # Need to combine the scoped functions into a dict + """ + from loopy.function_interface import InKernelCallable + scoped_function_dict = ((func, InKernelCallable(func)) for func in + scoped_functions) + """ + return kernel.copy(instructions=new_insns) + +# }}} + + # {{{ kernel creation top-level def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): @@ -2163,6 +2233,11 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): check_for_duplicate_names(knl) check_written_variable_names(knl) + # Function Lookup + # TODO: here I add my function for function_lookup. Lol. realize the UN-inteded + # pun + knl = scope_functions(knl) + from loopy.preprocess import prepare_for_caching knl = prepare_for_caching(knl) diff --git a/loopy/library/function.py b/loopy/library/function.py index 9d557ac9fe5c4c040608dc181b96daa812405a65..e8e1e22fae1b83c108d366d90f3e27199bec8682 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -23,7 +23,13 @@ THE SOFTWARE. """ +def default_function_identifiers(): + from loopy.library.reduction import reduction_function_identifiers + return set("make_tuple") | reduction_function_identifiers() + + def default_function_mangler(kernel, name, arg_dtypes): + from loopy.library.reduction import reduction_function_mangler manglers = [reduction_function_mangler, tuple_function_mangler] @@ -55,5 +61,4 @@ def tuple_function_mangler(kernel, name, arg_dtypes): return None - # vim: foldmethod=marker diff --git a/loopy/library/random123.py b/loopy/library/random123.py index b8633114ddeb9d48eb33a765755302917ca27f63..82e44b2d1978a40515aa87c48a6e1a60eaec9dc1 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -62,12 +62,8 @@ RNG_VARIANTS = [ _threefry_base_info.copy(width=4, bits=64), ] -FUNC_NAMES_TO_RNG = dict( - (v.full_name + suffix, v) - for v in RNG_VARIANTS - for suffix in [ - "", "_f32", "_f64", - ]) +FUNC_NAMES_TO_RNG = set(v.full_name + suffix for v in RNG_VARIANTS for suffix in + ["", "_f32", "_f64", ]) # }}} @@ -180,43 +176,9 @@ def random123_preamble_generator(preamble_info): )) -def random123_function_mangler(kernel, name, arg_dtypes): - try: - rng_variant = FUNC_NAMES_TO_RNG[name] - except KeyError: - return None - - from loopy.types import NumpyType - target = kernel.target - base_dtype = {32: np.uint32, 64: np.uint64}[rng_variant.bits] - ctr_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.width) - key_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.key_width) - - from loopy.kernel.data import CallMangleInfo - fn = rng_variant.full_name - if name == fn: - return CallMangleInfo( - target_name=fn+"_gen", - result_dtypes=(ctr_dtype, ctr_dtype), - arg_dtypes=(ctr_dtype, key_dtype)) - - elif name == fn + "_f32": - return CallMangleInfo( - target_name=name, - result_dtypes=( - target.vector_dtype(NumpyType(np.float32), rng_variant.width), - ctr_dtype), - arg_dtypes=(ctr_dtype, key_dtype)) - - elif name == fn + "_f64": - return CallMangleInfo( - target_name=name, - result_dtypes=( - target.vector_dtype(NumpyType(np.float64), rng_variant.width), - ctr_dtype), - arg_dtypes=(ctr_dtype, key_dtype)) - - else: - return None +def random123_function_identifiers(): + return FUNC_NAMES_TO_RNG + +# Removed the random123_function_mangler # vim: foldmethod=marker diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 0e5a093b76b8d09d331edead7c69fcc2e3134601..5daa1528a7d67c0dc35644dc40c6d179dc01527e 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -422,6 +422,13 @@ def parse_reduction_op(name): # }}} +def reduction_function_identifiers(): + """ Return a :class:`set` of the type of the reduction identifiers that can be + encountered in a kernel. + """ + return set(op for op in _REDUCTION_OPS) + + def reduction_function_mangler(kernel, func_id, arg_dtypes): if isinstance(func_id, ArgExtOp): from loopy.target.opencl import CTarget diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 0cc8f4ba6a1531d748bd90492f570dbb563d962d..16c9fd4822a091b8986819da6dd5c8facdb05026 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -112,6 +112,8 @@ class IdentityMapperMixin(object): map_rule_argument = map_group_hw_index + map_scoped_function = IdentityMapperBase.map_variable + class IdentityMapper(IdentityMapperBase, IdentityMapperMixin): pass @@ -125,6 +127,8 @@ class PartialEvaluationMapper( def map_common_subexpression_uncached(self, expr): return type(expr)(self.rec(expr.child), expr.prefix, expr.scope) + map_scoped_function = map_variable + class WalkMapper(WalkMapperBase): def map_literal(self, expr, *args): @@ -163,6 +167,8 @@ class WalkMapper(WalkMapperBase): map_rule_argument = map_group_hw_index + map_scoped_function = WalkMapperBase.map_variable + class CallbackMapper(CallbackMapperBase, IdentityMapper): map_reduction = CallbackMapperBase.map_constant @@ -174,6 +180,8 @@ class CombineMapper(CombineMapperBase): map_linear_subscript = CombineMapperBase.map_subscript + map_scoped_function = CombineMapperBase.map_variable + class SubstitutionMapper( CSECachingMapperMixin, SubstitutionMapperBase, IdentityMapperMixin): @@ -230,6 +238,9 @@ class StringifyMapper(StringifyMapperBase): from pymbolic.mapper.stringifier import PREC_NONE return "cast(%s, %s)" % (repr(expr.type), self.rec(expr.child, PREC_NONE)) + def map_scoped_function(self, expr, prec): + return "ScopedFunction('%s')" % expr.name + class UnidirectionalUnifier(UnidirectionalUnifierBase): def map_reduction(self, expr, other, unis): @@ -287,6 +298,8 @@ class DependencyMapper(DependencyMapperBase): def map_type_cast(self, expr): return self.rec(expr.child) + map_scoped_function = DependencyMapperBase.map_variable + class SubstitutionRuleExpander(IdentityMapper): def __init__(self, rules): @@ -322,6 +335,8 @@ class SubstitutionRuleExpander(IdentityMapper): return self.rec(expr) + map_scoped_function = map_variable + # }}} @@ -636,6 +651,15 @@ class RuleArgument(p.Expression): mapper_method = intern("map_rule_argument") + +class ScopedFunction(p.Variable): + """ Connects a call to a callable available in a kernel. + """ + mapper_method = intern("map_scoped_function") + + def stringifier(self): + return StringifyMapper + # }}} diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index a08b406f53798b4f7f6852a4f424182a75b224e4..fe6daf12cc06fde0c669d0a5e55e0a3ec62bee9f 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -150,6 +150,9 @@ class ASTBuilderBase(object): # {{{ library + def function_identifiers(self): + return set() + def function_manglers(self): return [] diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 8e69793e8079864a7e4c3117f267a20d6db3962f..2b5e394bbcc566510c27b069506b67f60d5cd911 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -356,6 +356,11 @@ def c_symbol_mangler(kernel, name): # {{{ function mangler +def c_math_identifiers(): + return set(["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "tanh", + "exp", "log", "log10", "sqrt", "ceil", "floor", "max", "min"]) + + def c_math_mangler(target, name, arg_dtypes, modify_name=True): # Function mangler for math functions defined in C standard # Convert abs, min, max to fabs, fmin, fmax. @@ -427,6 +432,11 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): class CASTBuilder(ASTBuilderBase): # {{{ library + def function_identifiers(self): + return ( + super(CASTBuilder, self).function_identifiers() | + c_math_identifiers()) + def function_manglers(self): return ( super(CASTBuilder, self).function_manglers() + [ diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 31e0569b92a9ddada8ad66c2e0a065c191cc61d3..94870907b147e6fab07fb3260ad70ecc3249316f 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -31,11 +31,10 @@ from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from pytools import memoize_method from loopy.diagnostic import LoopyError from loopy.types import NumpyType -from loopy.target.c import DTypeRegistryWrapper, c_math_mangler +from loopy.target.c import DTypeRegistryWrapper, c_math_identifiers from loopy.kernel.data import temp_var_scope, CallMangleInfo from pymbolic import var -from functools import partial # {{{ dtype registry wrappers @@ -139,8 +138,27 @@ def _register_vector_types(dtype_registry): # }}} +# {{{ function identifiers + +_CL_SIMPLE_MULTI_ARG_FUNC_IDS = set(["clamp", "atan2"]) + + +VECTOR_LITERAL_FUNC_IDS = set("make_%s%d" % (name, count) + for name in ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', + 'ulong', 'float', 'double'] + for count in [2, 3, 4, 8, 16] + ) + + +def opencl_function_identifiers(): + return set(["max", "min", "dot"]) | (_CL_SIMPLE_MULTI_ARG_FUNC_IDS | + VECTOR_LITERAL_FUNC_IDS) + +# }}} + # {{{ function mangler + _CL_SIMPLE_MULTI_ARG_FUNCTIONS = { "clamp": 3, "atan2": 2, @@ -356,8 +374,6 @@ class OpenCLTarget(CTarget): vec.types[base.numpy_dtype, count], target=self) - # }}} - # }}} @@ -366,13 +382,9 @@ class OpenCLTarget(CTarget): class OpenCLCASTBuilder(CASTBuilder): # {{{ library - def function_manglers(self): - return ( - [ - opencl_function_mangler, - partial(c_math_mangler, modify_name=False) - ] + - super(OpenCLCASTBuilder, self).function_manglers()) + def function_identifiers(self): + return (opencl_function_identifiers() | c_math_identifiers() | + super(OpenCLCASTBuilder, self).function_identifiers()) def symbol_manglers(self): return ( diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 744c03d8ed091bc0f05e4fc41aa14e88ec89276a..1451cf9e7f0076484c648dfd67e7587371492b64 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -199,6 +199,11 @@ def check_sizes(kernel, device): # }}} +def pyopencl_function_identifiers(): + return set(["sqrt", "exp", "log", "sin", "cos", "tan", "sinh", "cosh", "tanh", + "conj", "real", "imag", "abs"]) + + def pyopencl_function_mangler(target, name, arg_dtypes): if len(arg_dtypes) == 1 and isinstance(name, str): arg_dtype, = arg_dtypes @@ -739,6 +744,11 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): # {{{ library + def function_identifiers(self): + from loopy.library.random123 import random123_function_identifiers + return (super(PyOpenCLCASTBuilder, self).function_identifiers() | + pyopencl_function_identifiers() | random123_function_identifiers()) + def function_manglers(self): from loopy.library.random123 import random123_function_mangler return (