From e3bb8f37ddffec0e9c998c2950edb330a54df478 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Jul 2017 16:45:31 -0500 Subject: [PATCH 01/18] Add a make_tuple() function to loopy. This function does trivial things, but it's there to solve the problem that the reduction neutral element getters are not allowed to store dtypes (#80). The function mangler demands that a function knows its type based on its arguments. For the neutral element getters, this is impossible because they take zero arguments. The simplest fix I can think of is to change a call to neutral_element() to a call to make_tuple(). Currently, the tuple code doesn't work yet due to pickling issues. I think the root cause is somewhere in __hackily_ensure_multi_argument_functions_are_scoped_private(). --- loopy/library/function.py | 3 ++- loopy/type_inference.py | 2 +- test/test_target.py | 13 +++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/loopy/library/function.py b/loopy/library/function.py index efa590371..f3d14516c 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -25,8 +25,9 @@ THE SOFTWARE. def default_function_mangler(kernel, name, arg_dtypes): from loopy.library.reduction import reduction_function_mangler + from loopy.library.tuple import tuple_function_mangler - manglers = [reduction_function_mangler] + manglers = [reduction_function_mangler, tuple_function_mangler] for mangler in manglers: result = mangler(kernel, name, arg_dtypes) if result is not None: diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 78d817ce7..3fb165ead 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -442,7 +442,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): result_i = comp_dtype_set break - assert found + assert found, var_name if result_i is not None: result.append(result_i) diff --git a/test/test_target.py b/test/test_target.py index b656383e7..4b09829e1 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -176,6 +176,19 @@ def test_random123(ctx_factory, tp): assert (0 <= out).all() +def test_tuple(): + knl = lp.make_kernel( + "{ [i]: 0 <= i < 10 }", + """ + a, b = make_tuple(1, 2) + """) + + print( + lp.generate_code( + lp.get_one_scheduled_kernel( + lp.preprocess_kernel(knl)))[0]) + + def test_clamp(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) -- GitLab From b25c5bc238729de8ee4fb0ac258d5f28590d3e15 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Jul 2017 16:54:50 -0500 Subject: [PATCH 02/18] [ci skip] Add missing file. --- loopy/library/tuple.py | 69 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 loopy/library/tuple.py diff --git a/loopy/library/tuple.py b/loopy/library/tuple.py new file mode 100644 index 000000000..e60d24d70 --- /dev/null +++ b/loopy/library/tuple.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import, division, print_function + +__copyright__ = "Copyright (C) 2017 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from loopy.diagnostic import LoopyError + + +def tuple_function_mangler(kernel, name, arg_dtypes): + if name == "make_tuple": + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name=tuple_function_name(*arg_dtypes), + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + return None + + +def tuple_function_name(dtype0, dtype1): + return "loopy_tuple_%s_%s" % ( + dtype0.numpy_dtype.type.__name__, dtype1.numpy_dtype.type.__name__) + + +def get_tuple_preamble(kernel, func_id, arg_dtypes): + print("arg dtypes are", arg_dtypes) + name = tuple_function_name(*arg_dtypes) + return (name, """ + inline %(t0)s %(name)s(%(t0)s i0, %(t1)s i1, %(t1)s *o1) + { + *o1 = i1; + return i0; + } + """ % dict(name=name, + t0=kernel.target.dtype_to_typename(arg_dtypes[0]), + t1=kernel.target.dtype_to_typename(arg_dtypes[1]))) + + +def tuple_preamble_generator(preamble_info): + from loopy.target.opencl import OpenCLTarget + + for func in preamble_info.seen_functions: + if func.name == "make_tuple": + if not isinstance(preamble_info.kernel.target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + + yield get_tuple_preamble(preamble_info.kernel, func.name, + func.arg_dtypes) + +# vim: fdm=marker -- GitLab From 4dc48bde8aef3d5d95637c0ee954a7f87c68aec4 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Jul 2017 16:56:19 -0500 Subject: [PATCH 03/18] Remove print statement. --- loopy/library/tuple.py | 1 - 1 file changed, 1 deletion(-) diff --git a/loopy/library/tuple.py b/loopy/library/tuple.py index e60d24d70..dd6b553eb 100644 --- a/loopy/library/tuple.py +++ b/loopy/library/tuple.py @@ -42,7 +42,6 @@ def tuple_function_name(dtype0, dtype1): def get_tuple_preamble(kernel, func_id, arg_dtypes): - print("arg dtypes are", arg_dtypes) name = tuple_function_name(*arg_dtypes) return (name, """ inline %(t0)s %(name)s(%(t0)s i0, %(t1)s i1, %(t1)s *o1) -- GitLab From 5201ec1f5a6c326e77d5346dbd0fc006a8cab7ae Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sun, 9 Jul 2017 15:43:35 -0500 Subject: [PATCH 04/18] Make the tuple generation work. --- loopy/preprocess.py | 8 +++++++- loopy/target/opencl.py | 5 ++++- test/test_target.py | 17 ++++++++++------- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index c331ccc82..30968630f 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -331,6 +331,9 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): # }}} + from loopy.type_inference import TypeInferenceMapper + type_inf_mapper = TypeInferenceMapper(kernel) + from loopy.kernel.instruction import CallInstruction for insn in kernel.instructions: if not isinstance(insn, CallInstruction): @@ -352,6 +355,9 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): FIRST_POINTER_ASSIGNEE_IDX = 1 # noqa + assignee_dtypes, = type_inf_mapper( + insn.expression, return_tuple=True, return_dtype_set=True) + for assignee_nr, assignee_var_name, assignee in zip( range(FIRST_POINTER_ASSIGNEE_IDX, len(assignees)), assignee_var_names[FIRST_POINTER_ASSIGNEE_IDX:], @@ -383,7 +389,7 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): new_temporaries[new_assignee_name] = ( TemporaryVariable( name=new_assignee_name, - dtype=lp.auto, + dtype=assignee_dtypes[assignee_nr], scope=temp_var_scope.PRIVATE)) from pymbolic import var diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 01e56405e..e70acfeab 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -390,10 +390,13 @@ class OpenCLCASTBuilder(CASTBuilder): def preamble_generators(self): from loopy.library.reduction import reduction_preamble_generator + from loopy.library.tuple import tuple_preamble_generator + return ( super(OpenCLCASTBuilder, self).preamble_generators() + [ opencl_preamble_generator, - reduction_preamble_generator + reduction_preamble_generator, + tuple_preamble_generator ]) # }}} diff --git a/test/test_target.py b/test/test_target.py index 4b09829e1..2c6119552 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -176,17 +176,20 @@ def test_random123(ctx_factory, tp): assert (0 <= out).all() -def test_tuple(): +def test_tuple(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + knl = lp.make_kernel( - "{ [i]: 0 <= i < 10 }", + "{ [i]: 0 = i }", """ - a, b = make_tuple(1, 2) + a, b = make_tuple(1, 2.) """) - print( - lp.generate_code( - lp.get_one_scheduled_kernel( - lp.preprocess_kernel(knl)))[0]) + evt, (a,b) = knl(queue) + + assert a.get() == 1 + assert b.get() == 2. def test_clamp(ctx_factory): -- GitLab From c4891c7157be269d83b5963cb5bcceee0b0e3866 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sun, 9 Jul 2017 19:43:27 -0500 Subject: [PATCH 05/18] flake8 fixes --- loopy/preprocess.py | 1 - test/test_target.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 30968630f..38499cb91 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -385,7 +385,6 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): newly_added_assignments_ids.add(new_assignment_id) - import loopy as lp new_temporaries[new_assignee_name] = ( TemporaryVariable( name=new_assignee_name, diff --git a/test/test_target.py b/test/test_target.py index 2c6119552..ad0cb7439 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -186,7 +186,7 @@ def test_tuple(ctx_factory): a, b = make_tuple(1, 2.) """) - evt, (a,b) = knl(queue) + evt, (a, b) = knl(queue) assert a.get() == 1 assert b.get() == 2. -- GitLab From c6898ffa48da9ef24acdc65570e44c9aa95de707 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sun, 9 Jul 2017 20:24:42 -0500 Subject: [PATCH 06/18] Fix argmin and segmented reductions. --- loopy/library/reduction.py | 213 +++++++++++++------------------------ test/test_loopy.py | 41 +++++++ 2 files changed, 115 insertions(+), 139 deletions(-) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index f9648bde7..962b31681 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -123,7 +123,7 @@ class ScalarReductionOperation(ReductionOperation): class SumReductionOperation(ScalarReductionOperation): def neutral_element(self, dtype): - return 0 + return dtype.numpy_dtype.type(0) def __call__(self, dtype, operand1, operand2): return operand1 + operand2 @@ -131,7 +131,7 @@ class SumReductionOperation(ScalarReductionOperation): class ProductReductionOperation(ScalarReductionOperation): def neutral_element(self, dtype): - return 1 + return dtype.numpy_dtype.type(1) def __call__(self, dtype, operand1, operand2): return operand1 * operand2 @@ -189,8 +189,26 @@ class MinReductionOperation(ScalarReductionOperation): return var("min")(operand1, operand2) +# {{{ base class for symbolic reduction ops + +class ReductionOpFunction(FunctionIdentifier): + init_arg_names = ("reduction_op",) + + def __init__(self, reduction_op): + self.reduction_op = reduction_op + + def __getinitargs__(self): + return (self.reduction_op,) + +# }}} + + # {{{ segmented reduction +class SegmentedOp(ReductionOpFunction): + pass + + class _SegmentedScalarReductionOperation(ReductionOperation): def __init__(self, **kwargs): self.inner_reduction = self.base_reduction_class(**kwargs) @@ -205,7 +223,9 @@ class _SegmentedScalarReductionOperation(ReductionOperation): segment_flag_dtype.numpy_dtype.type.__name__) def neutral_element(self, scalar_dtype, segment_flag_dtype): - return SegmentedFunction(self, (scalar_dtype, segment_flag_dtype), "init")() + scalar_neutral_element = self.inner_reduction.neutral_element(scalar_dtype) + return var("make_tuple")(scalar_neutral_element, + segment_flag_dtype.numpy_dtype.type(0)) def result_dtypes(self, kernel, scalar_dtype, segment_flag_dtype): return (self.inner_reduction.result_dtypes(kernel, scalar_dtype) @@ -221,7 +241,7 @@ class _SegmentedScalarReductionOperation(ReductionOperation): return type(self) == type(other) def __call__(self, dtypes, operand1, operand2): - return SegmentedFunction(self, dtypes, "update")(*(operand1 + operand2)) + return SegmentedOp(self)(*(operand1 + operand2)) class SegmentedSumReductionOperation(_SegmentedScalarReductionOperation): @@ -236,45 +256,13 @@ class SegmentedProductReductionOperation(_SegmentedScalarReductionOperation): which = "product" -class SegmentedFunction(FunctionIdentifier): - init_arg_names = ("reduction_op", "dtypes", "name") - - def __init__(self, reduction_op, dtypes, name): - """ - :arg dtypes: A :class:`tuple` of `(scalar_dtype, segment_flag_dtype)` - """ - self.reduction_op = reduction_op - self.dtypes = dtypes - self.name = name - - @property - def scalar_dtype(self): - return self.dtypes[0] - - @property - def segment_flag_dtype(self): - return self.dtypes[1] - - def __getinitargs__(self): - return (self.reduction_op, self.dtypes, self.name) - - -def get_segmented_function_preamble(kernel, func_id): +def get_segmented_function_preamble(kernel, func_id, arg_dtypes): op = func_id.reduction_op - prefix = op.prefix(func_id.scalar_dtype, func_id.segment_flag_dtype) - - from pymbolic.mapper.c_code import CCodeMapper - - c_code_mapper = CCodeMapper() + scalar_dtype, segment_flag_dtype = arg_dtypes + prefix = op.prefix(scalar_dtype, segment_flag_dtype) return (prefix, """ - inline %(scalar_t)s %(prefix)s_init(%(segment_flag_t)s *segment_flag_out) - { - *segment_flag_out = 0; - return %(neutral)s; - } - - inline %(scalar_t)s %(prefix)s_update( + inline %(scalar_t)s %(prefix)s_op( %(scalar_t)s op1, %(segment_flag_t)s segment_flag1, %(scalar_t)s op2, %(segment_flag_t)s segment_flag2, %(segment_flag_t)s *segment_flag_out) @@ -283,32 +271,36 @@ def get_segmented_function_preamble(kernel, func_id): return segment_flag2 ? op2 : %(combined)s; } """ % dict( - scalar_t=kernel.target.dtype_to_typename(func_id.scalar_dtype), + scalar_t=kernel.target.dtype_to_typename(scalar_dtype), prefix=prefix, - segment_flag_t=kernel.target.dtype_to_typename( - func_id.segment_flag_dtype), - neutral=c_code_mapper( - op.inner_reduction.neutral_element(func_id.scalar_dtype)), + segment_flag_t=kernel.target.dtype_to_typename(segment_flag_dtype), combined=op.op % ("op1", "op2"), )) - # }}} # {{{ argmin/argmax +class ArgExtOp(ReductionOpFunction): + pass + + class _ArgExtremumReductionOperation(ReductionOperation): def prefix(self, scalar_dtype, index_dtype): return "loopy_arg%s_%s_%s" % (self.which, - index_dtype.numpy_dtype.type.__name__, - scalar_dtype.numpy_dtype.type.__name__) + scalar_dtype.numpy_dtype.type.__name__, + index_dtype.numpy_dtype.type.__name__) def result_dtypes(self, kernel, scalar_dtype, index_dtype): return (scalar_dtype, index_dtype) def neutral_element(self, scalar_dtype, index_dtype): - return ArgExtFunction(self, (scalar_dtype, index_dtype), "init")() + scalar_neutral_func = ( + get_ge_neutral if self.neutral_sign < 0 else get_le_neutral) + scalar_neutral_element = scalar_neutral_func(scalar_dtype) + return var("make_tuple")(scalar_neutral_element, + index_dtype.numpy_dtype.type(-1)) def __str__(self): return self.which @@ -324,7 +316,7 @@ class _ArgExtremumReductionOperation(ReductionOperation): return 2 def __call__(self, dtypes, operand1, operand2): - return ArgExtFunction(self, dtypes, "update")(*(operand1 + operand2)) + return ArgExtOp(self)(*(operand1 + operand2)) class ArgMaxReductionOperation(_ArgExtremumReductionOperation): @@ -339,44 +331,15 @@ class ArgMinReductionOperation(_ArgExtremumReductionOperation): neutral_sign = +1 -class ArgExtFunction(FunctionIdentifier): - init_arg_names = ("reduction_op", "dtypes", "name") - - def __init__(self, reduction_op, dtypes, name): - self.reduction_op = reduction_op - self.dtypes = dtypes - self.name = name - - @property - def scalar_dtype(self): - return self.dtypes[0] - - @property - def index_dtype(self): - return self.dtypes[1] - - def __getinitargs__(self): - return (self.reduction_op, self.dtypes, self.name) - - -def get_argext_preamble(kernel, func_id): +def get_argext_preamble(kernel, func_id, arg_dtypes): op = func_id.reduction_op - prefix = op.prefix(func_id.scalar_dtype, func_id.index_dtype) - - from pymbolic.mapper.c_code import CCodeMapper + scalar_dtype = arg_dtypes[0] + index_dtype = arg_dtypes[1] - c_code_mapper = CCodeMapper() - - neutral = get_ge_neutral if op.neutral_sign < 0 else get_le_neutral + prefix = op.prefix(scalar_dtype, index_dtype) return (prefix, """ - inline %(scalar_t)s %(prefix)s_init(%(index_t)s *index_out) - { - *index_out = INT_MIN; - return %(neutral)s; - } - - inline %(scalar_t)s %(prefix)s_update( + inline %(scalar_t)s %(prefix)s_op( %(scalar_t)s op1, %(index_t)s index1, %(scalar_t)s op2, %(index_t)s index2, %(index_t)s *index_out) @@ -393,10 +356,9 @@ def get_argext_preamble(kernel, func_id): } } """ % dict( - scalar_t=kernel.target.dtype_to_typename(func_id.scalar_dtype), + scalar_t=kernel.target.dtype_to_typename(scalar_dtype), prefix=prefix, - index_t=kernel.target.dtype_to_typename(func_id.index_dtype), - neutral=c_code_mapper(neutral(func_id.scalar_dtype)), + index_t=kernel.target.dtype_to_typename(index_dtype), comp=op.update_comparison, )) @@ -454,76 +416,47 @@ def parse_reduction_op(name): def reduction_function_mangler(kernel, func_id, arg_dtypes): - if isinstance(func_id, ArgExtFunction) and func_id.name == "init": + if isinstance(func_id, ArgExtOp): from loopy.target.opencl import CTarget if not isinstance(kernel.target, CTarget): raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op + scalar_dtype = arg_dtypes[0] + index_dtype = arg_dtypes[1] from loopy.kernel.data import CallMangleInfo return CallMangleInfo( - target_name="%s_init" % op.prefix( - func_id.scalar_dtype, func_id.index_dtype), + target_name="%s_op" % op.prefix( + scalar_dtype, index_dtype), result_dtypes=op.result_dtypes( - kernel, func_id.scalar_dtype, func_id.index_dtype), - arg_dtypes=(), - ) - - elif isinstance(func_id, ArgExtFunction) and func_id.name == "update": - from loopy.target.opencl import CTarget - if not isinstance(kernel.target, CTarget): - raise LoopyError("%s: only C-like targets supported for now" % func_id) - - op = func_id.reduction_op - - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="%s_update" % op.prefix( - func_id.scalar_dtype, func_id.index_dtype), - result_dtypes=op.result_dtypes( - kernel, func_id.scalar_dtype, func_id.index_dtype), + kernel, scalar_dtype, index_dtype), arg_dtypes=( - func_id.scalar_dtype, - kernel.index_dtype, - func_id.scalar_dtype, - kernel.index_dtype), - ) - - elif isinstance(func_id, SegmentedFunction) and func_id.name == "init": - from loopy.target.opencl import CTarget - if not isinstance(kernel.target, CTarget): - raise LoopyError("%s: only C-like targets supported for now" % func_id) - - op = func_id.reduction_op - - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="%s_init" % op.prefix( - func_id.scalar_dtype, func_id.segment_flag_dtype), - result_dtypes=op.result_dtypes( - kernel, func_id.scalar_dtype, func_id.segment_flag_dtype), - arg_dtypes=(), + scalar_dtype, + index_dtype, + scalar_dtype, + index_dtype), ) - elif isinstance(func_id, SegmentedFunction) and func_id.name == "update": + elif isinstance(func_id, SegmentedOp): from loopy.target.opencl import CTarget if not isinstance(kernel.target, CTarget): raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op + scalar_dtype, segment_flag_dtype = arg_dtypes from loopy.kernel.data import CallMangleInfo return CallMangleInfo( - target_name="%s_update" % op.prefix( - func_id.scalar_dtype, func_id.segment_flag_dtype), + target_name="%s_op" % op.prefix( + scalar_dtype, segment_flag_dtype), result_dtypes=op.result_dtypes( - kernel, func_id.scalar_dtype, func_id.segment_flag_dtype), + kernel, scalar_dtype, segment_flag_dtype), arg_dtypes=( - func_id.scalar_dtype, - func_id.segment_flag_dtype, - func_id.scalar_dtype, - func_id.segment_flag_dtype), + scalar_dtype, + segment_flag_dtype, + scalar_dtype, + segment_flag_dtype), ) return None @@ -533,16 +466,18 @@ def reduction_preamble_generator(preamble_info): from loopy.target.opencl import OpenCLTarget for func in preamble_info.seen_functions: - if isinstance(func.name, ArgExtFunction): + if isinstance(func.name, ArgExtOp): if not isinstance(preamble_info.kernel.target, OpenCLTarget): raise LoopyError("only OpenCL supported for now") - yield get_argext_preamble(preamble_info.kernel, func.name) + yield get_argext_preamble(preamble_info.kernel, func.name, + func.arg_dtypes) - elif isinstance(func.name, SegmentedFunction): + elif isinstance(func.name, SegmentedOp): if not isinstance(preamble_info.kernel.target, OpenCLTarget): raise LoopyError("only OpenCL supported for now") - yield get_segmented_function_preamble(preamble_info.kernel, func.name) + yield get_segmented_function_preamble(preamble_info.kernel, func.name, + func.arg_dtypes) # vim: fdm=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 21db62610..ad5fd72b6 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2335,6 +2335,47 @@ def test_kernel_var_name_generator(): assert vng("b") != "b" +def test_complex_argmin(ctx_factory): + cl_ctx = ctx_factory() + knl = lp.make_kernel( + "{[ictr,itgt,idim]: " + "0<=itgt dist_sq = sum(idim, + (tgt[idim,itgt] - center[idim,ictr])**2) + <> in_disk = dist_sq < (radius[ictr]*1.05)**2 + <> matches = ( + (in_disk + and qbx_forced_limit == 0) + or (in_disk + and qbx_forced_limit != 0 + and qbx_forced_limit * center_side[ictr] > 0) + ) + + <> post_dist_sq = if(matches, dist_sq, HUGE) + end + <> min_dist_sq, <> min_ictr = argmin(ictr, ictr, post_dist_sq) + + tgt_to_qbx_center[itgt] = if(min_dist_sq < HUGE, min_ictr, -1) + end + """) + + knl = lp.fix_parameters(knl, ambient_dim=2) + knl = lp.add_and_infer_dtypes(knl, { + "tgt,center,radius,HUGE": np.float32, + "center_side,qbx_forced_limit": np.int32, + }) + + lp.auto_test_vs_ref(knl, cl_ctx, knl, parameters={ + "HUGE": 1e20, "ncenters": 200, "ntargets": 300, + "qbx_forced_limit": 1}) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From d5222cf99108c2f017caf1f324c180d8916044a1 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 15 Jul 2017 19:07:45 -0500 Subject: [PATCH 07/18] Change tuple assignment to be implemented directly by the code generator, so that we can avoid generating a dummy C function. --- loopy/library/tuple.py | 32 +------------------------------- loopy/target/c/__init__.py | 33 +++++++++++++++++++++++++++++++++ loopy/target/opencl.py | 2 -- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/loopy/library/tuple.py b/loopy/library/tuple.py index dd6b553eb..ce2865ff5 100644 --- a/loopy/library/tuple.py +++ b/loopy/library/tuple.py @@ -29,40 +29,10 @@ def tuple_function_mangler(kernel, name, arg_dtypes): if name == "make_tuple": from loopy.kernel.data import CallMangleInfo return CallMangleInfo( - target_name=tuple_function_name(*arg_dtypes), + target_name="loopy_make_tuple", result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) return None - -def tuple_function_name(dtype0, dtype1): - return "loopy_tuple_%s_%s" % ( - dtype0.numpy_dtype.type.__name__, dtype1.numpy_dtype.type.__name__) - - -def get_tuple_preamble(kernel, func_id, arg_dtypes): - name = tuple_function_name(*arg_dtypes) - return (name, """ - inline %(t0)s %(name)s(%(t0)s i0, %(t1)s i1, %(t1)s *o1) - { - *o1 = i1; - return i0; - } - """ % dict(name=name, - t0=kernel.target.dtype_to_typename(arg_dtypes[0]), - t1=kernel.target.dtype_to_typename(arg_dtypes[1]))) - - -def tuple_preamble_generator(preamble_info): - from loopy.target.opencl import OpenCLTarget - - for func in preamble_info.seen_functions: - if func.name == "make_tuple": - if not isinstance(preamble_info.kernel.target, OpenCLTarget): - raise LoopyError("only OpenCL supported for now") - - yield get_tuple_preamble(preamble_info.kernel, func.name, - func.arg_dtypes) - # vim: fdm=marker diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e4835a363..ed1ba1ce9 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -648,6 +648,36 @@ class CASTBuilder(ASTBuilderBase): lhs_expr, rhs_expr, lhs_dtype): raise NotImplementedError("atomic updates in %s" % type(self).__name__) + def emit_tuple_assignment(self, codegen_state, insn): + ecm = codegen_state.expression_to_code_mapper + + parameters = insn.expression.parameters + parameter_dtypes = tuple(ecm.infer_type(par) for par in parameters) + + from cgen import Assign, block_if_necessary + assignments = [] + + for i, (assignee, tgt_dtype) in enumerate( + zip(insn.assignees, parameter_dtypes)): + if tgt_dtype != ecm.infer_type(assignee): + raise LoopyError("type mismatch in %d'th (0-based) left-hand " + "side of instruction '%s'" % (i, insn.id)) + + lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None) + assignee_var_name = insn.assignee_var_names()[i] + lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) + lhs_dtype = lhs_var.dtype + + from loopy.expression import dtype_to_type_context + rhs_type_context = dtype_to_type_context( + codegen_state.kernel.target, lhs_dtype) + rhs_code = ecm(parameters[i], prec=PREC_NONE, + type_context=rhs_type_context, needed_dtype=lhs_dtype) + + assignments.append(Assign(lhs_code, rhs_code)) + + return block_if_necessary(assignments) + def emit_multiple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper @@ -674,6 +704,9 @@ class CASTBuilder(ASTBuilderBase): assert mangle_result.arg_dtypes is not None + if mangle_result.target_name == "loopy_make_tuple": + return self.emit_tuple_assignment(codegen_state, insn) + from loopy.expression import dtype_to_type_context c_parameters = [ ecm(par, PREC_NONE, diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index e70acfeab..a5f7562c4 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -390,13 +390,11 @@ class OpenCLCASTBuilder(CASTBuilder): def preamble_generators(self): from loopy.library.reduction import reduction_preamble_generator - from loopy.library.tuple import tuple_preamble_generator return ( super(OpenCLCASTBuilder, self).preamble_generators() + [ opencl_preamble_generator, reduction_preamble_generator, - tuple_preamble_generator ]) # }}} -- GitLab From 542e9756758f40c58f0503a4d0c7c993ec3137c2 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 15 Jul 2017 20:18:39 -0500 Subject: [PATCH 08/18] test_nested_scan(): Declare the dtype of tmp (I think it's fair that it fails without this.) --- test/test_scan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_scan.py b/test/test_scan.py index 08754819c..c225c2c1c 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -182,6 +182,7 @@ def test_nested_scan(ctx_factory, i_tag, j_tag): knl = lp.fix_parameters(knl, n=10) knl = lp.tag_inames(knl, dict(i=i_tag, j=j_tag)) + knl = lp.add_dtypes(knl, dict(tmp=int)) knl = lp.realize_reduction(knl, force_scan=True) print(knl) -- GitLab From 45332d8f857b34da1efeb159d7f52b3b63656c1a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 15 Jul 2017 20:19:37 -0500 Subject: [PATCH 09/18] Tuple private scalar assignment hack: Skip make_tuple(). --- loopy/preprocess.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index de7f2b593..c1492789f 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -773,6 +773,21 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): FIRST_POINTER_ASSIGNEE_IDX = 1 # noqa + param_dtypes = tuple(type_inf_mapper(param) + for param in insn.expression.parameters) + + func_id = insn.expression.function + + from pymbolic.primitives import Variable + if isinstance(func_id, Variable): + func_id = func_id.name + + mangle_result = kernel.mangle_function(func_id, param_dtypes) + + if mangle_result.target_name == "loopy_make_tuple": + # Skip loopy_make_tuple. This is lowered without a function call. + continue + assignee_dtypes, = type_inf_mapper( insn.expression, return_tuple=True, return_dtype_set=True) @@ -806,7 +821,8 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): new_temporaries[new_assignee_name] = ( TemporaryVariable( name=new_assignee_name, - dtype=assignee_dtypes[assignee_nr], + dtype=assignee_dtypes[assignee_nr].with_target( + kernel.target), scope=temp_var_scope.PRIVATE)) from pymbolic import var @@ -985,12 +1001,18 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, var_name_gen(id + "_arg" + str(i)) for i in range(nresults)] - for name in temp_var_names: + from loopy.type_inference import infer_arg_and_reduction_dtypes_for_reduction_expression + + _, reduction_dtypes = ( + infer_arg_and_reduction_dtypes_for_reduction_expression( + temp_kernel, expr, unknown_types_ok=False)) + + for name, dtype in zip(temp_var_names, reduction_dtypes): from loopy.kernel.data import TemporaryVariable, temp_var_scope new_temporary_variables[name] = TemporaryVariable( name=name, shape=(), - dtype=lp.auto, + dtype=dtype, scope=temp_var_scope.PRIVATE) from pymbolic import var -- GitLab From 31a2bfb021b31def8d0ccf41f6ba1939d1ea310a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 15 Jul 2017 20:20:13 -0500 Subject: [PATCH 10/18] Fix dtype getting for reduction mangler. --- loopy/library/reduction.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 962b31681..bd085b7e8 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -258,7 +258,8 @@ class SegmentedProductReductionOperation(_SegmentedScalarReductionOperation): def get_segmented_function_preamble(kernel, func_id, arg_dtypes): op = func_id.reduction_op - scalar_dtype, segment_flag_dtype = arg_dtypes + scalar_dtype = arg_dtypes[0] + segment_flag_dtype = arg_dtypes[1] prefix = op.prefix(scalar_dtype, segment_flag_dtype) return (prefix, """ @@ -444,7 +445,8 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op - scalar_dtype, segment_flag_dtype = arg_dtypes + scalar_dtype = arg_dtypes[0] + segment_flag_dtype = arg_dtypes[1] from loopy.kernel.data import CallMangleInfo return CallMangleInfo( -- GitLab From 1e0696efb71c3e0aa83f0dacd3d50afdef7c9825 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 15 Jul 2017 20:20:35 -0500 Subject: [PATCH 11/18] Type inference for reduction: Make sure reduction arg types have a target too. --- loopy/type_inference.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 3fb165ead..7b3a67c6b 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -671,6 +671,11 @@ def infer_arg_and_reduction_dtypes_for_reduction_expression( raise LoopyError("failed to determine type of accumulator for " "reduction '%s'" % expr) + arg_dtypes = tuple( + dt.with_target(kernel.target) + if dt is not lp.auto else dt + for dt in arg_dtypes) + reduction_dtypes = expr.operation.result_dtypes(kernel, *arg_dtypes) reduction_dtypes = tuple( dt.with_target(kernel.target) -- GitLab From 28800337b5fed0b72ff9d1bfb7706faa2a2b3048 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 15 Jul 2017 20:23:48 -0500 Subject: [PATCH 12/18] Move complicated argmin test inside test_loopy to reduce the chance of a merge conflict. --- test/test_loopy.py | 82 +++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 3ac857478..48cb6980a 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2087,6 +2087,47 @@ def test_integer_reduction(ctx_factory): assert function(out) +def test_complicated_argmin_reduction(ctx_factory): + cl_ctx = ctx_factory() + knl = lp.make_kernel( + "{[ictr,itgt,idim]: " + "0<=itgt dist_sq = sum(idim, + (tgt[idim,itgt] - center[idim,ictr])**2) + <> in_disk = dist_sq < (radius[ictr]*1.05)**2 + <> matches = ( + (in_disk + and qbx_forced_limit == 0) + or (in_disk + and qbx_forced_limit != 0 + and qbx_forced_limit * center_side[ictr] > 0) + ) + + <> post_dist_sq = if(matches, dist_sq, HUGE) + end + <> min_dist_sq, <> min_ictr = argmin(ictr, ictr, post_dist_sq) + + tgt_to_qbx_center[itgt] = if(min_dist_sq < HUGE, min_ictr, -1) + end + """) + + knl = lp.fix_parameters(knl, ambient_dim=2) + knl = lp.add_and_infer_dtypes(knl, { + "tgt,center,radius,HUGE": np.float32, + "center_side,qbx_forced_limit": np.int32, + }) + + lp.auto_test_vs_ref(knl, cl_ctx, knl, parameters={ + "HUGE": 1e20, "ncenters": 200, "ntargets": 300, + "qbx_forced_limit": 1}) + + def test_nosync_option_parsing(): knl = lp.make_kernel( "{[i]: 0 <= i < 10}", @@ -2335,47 +2376,6 @@ def test_kernel_var_name_generator(): assert vng("b") != "b" -def test_complex_argmin(ctx_factory): - cl_ctx = ctx_factory() - knl = lp.make_kernel( - "{[ictr,itgt,idim]: " - "0<=itgt dist_sq = sum(idim, - (tgt[idim,itgt] - center[idim,ictr])**2) - <> in_disk = dist_sq < (radius[ictr]*1.05)**2 - <> matches = ( - (in_disk - and qbx_forced_limit == 0) - or (in_disk - and qbx_forced_limit != 0 - and qbx_forced_limit * center_side[ictr] > 0) - ) - - <> post_dist_sq = if(matches, dist_sq, HUGE) - end - <> min_dist_sq, <> min_ictr = argmin(ictr, ictr, post_dist_sq) - - tgt_to_qbx_center[itgt] = if(min_dist_sq < HUGE, min_ictr, -1) - end - """) - - knl = lp.fix_parameters(knl, ambient_dim=2) - knl = lp.add_and_infer_dtypes(knl, { - "tgt,center,radius,HUGE": np.float32, - "center_side,qbx_forced_limit": np.int32, - }) - - lp.auto_test_vs_ref(knl, cl_ctx, knl, parameters={ - "HUGE": 1e20, "ncenters": 200, "ntargets": 300, - "qbx_forced_limit": 1}) - - if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 2de504011b315dae9a793f36ad2adbea8375c10f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 15 Jul 2017 20:24:28 -0500 Subject: [PATCH 13/18] Pacify flake8. --- loopy/preprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index c1492789f..8172051cc 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -1001,7 +1001,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, var_name_gen(id + "_arg" + str(i)) for i in range(nresults)] - from loopy.type_inference import infer_arg_and_reduction_dtypes_for_reduction_expression + from loopy.type_inference import ( + infer_arg_and_reduction_dtypes_for_reduction_expression) _, reduction_dtypes = ( infer_arg_and_reduction_dtypes_for_reduction_expression( -- GitLab From e203e212d04e74714316a86be34a1813cf426568 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sun, 16 Jul 2017 14:43:37 -0500 Subject: [PATCH 14/18] Remove tuple.py --- loopy/library/function.py | 12 +++++++++++- loopy/library/tuple.py | 38 -------------------------------------- 2 files changed, 11 insertions(+), 39 deletions(-) delete mode 100644 loopy/library/tuple.py diff --git a/loopy/library/function.py b/loopy/library/function.py index f3d14516c..9d557ac9f 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -25,7 +25,6 @@ THE SOFTWARE. def default_function_mangler(kernel, name, arg_dtypes): from loopy.library.reduction import reduction_function_mangler - from loopy.library.tuple import tuple_function_mangler manglers = [reduction_function_mangler, tuple_function_mangler] for mangler in manglers: @@ -46,4 +45,15 @@ def single_arg_function_mangler(kernel, name, arg_dtypes): return None +def tuple_function_mangler(kernel, name, arg_dtypes): + if name == "make_tuple": + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="loopy_make_tuple", + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + return None + + # vim: foldmethod=marker diff --git a/loopy/library/tuple.py b/loopy/library/tuple.py deleted file mode 100644 index ce2865ff5..000000000 --- a/loopy/library/tuple.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import absolute_import, division, print_function - -__copyright__ = "Copyright (C) 2017 Matt Wala" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -from loopy.diagnostic import LoopyError - - -def tuple_function_mangler(kernel, name, arg_dtypes): - if name == "make_tuple": - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="loopy_make_tuple", - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - return None - -# vim: fdm=marker -- GitLab From 83428f328e9ef433f9422809562d82e6c52d8819 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 19 Jul 2017 01:04:27 -0500 Subject: [PATCH 15/18] Be less strict about data types in tuples / reductions. --- loopy/library/reduction.py | 6 ++++-- loopy/target/c/__init__.py | 15 ++++----------- test/test_scan.py | 1 - 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index bd085b7e8..3c5f4a142 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -123,7 +123,8 @@ class ScalarReductionOperation(ReductionOperation): class SumReductionOperation(ScalarReductionOperation): def neutral_element(self, dtype): - return dtype.numpy_dtype.type(0) + # FIXME: Document that we always use an int here. + return 0 def __call__(self, dtype, operand1, operand2): return operand1 + operand2 @@ -131,7 +132,8 @@ class SumReductionOperation(ScalarReductionOperation): class ProductReductionOperation(ScalarReductionOperation): def neutral_element(self, dtype): - return dtype.numpy_dtype.type(1) + # FIXME: Document that we always use an int here. + return 1 def __call__(self, dtype, operand1, operand2): return operand1 * operand2 diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index ed1ba1ce9..e9457233f 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -651,18 +651,11 @@ class CASTBuilder(ASTBuilderBase): def emit_tuple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper - parameters = insn.expression.parameters - parameter_dtypes = tuple(ecm.infer_type(par) for par in parameters) - from cgen import Assign, block_if_necessary assignments = [] - for i, (assignee, tgt_dtype) in enumerate( - zip(insn.assignees, parameter_dtypes)): - if tgt_dtype != ecm.infer_type(assignee): - raise LoopyError("type mismatch in %d'th (0-based) left-hand " - "side of instruction '%s'" % (i, insn.id)) - + for i, (assignee, parameter) in enumerate( + zip(insn.assignees, insn.expression.parameters)): lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None) assignee_var_name = insn.assignee_var_names()[i] lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) @@ -671,8 +664,8 @@ class CASTBuilder(ASTBuilderBase): from loopy.expression import dtype_to_type_context rhs_type_context = dtype_to_type_context( codegen_state.kernel.target, lhs_dtype) - rhs_code = ecm(parameters[i], prec=PREC_NONE, - type_context=rhs_type_context, needed_dtype=lhs_dtype) + rhs_code = ecm(parameter, prec=PREC_NONE, + type_context=rhs_type_context, needed_dtype=lhs_dtype) assignments.append(Assign(lhs_code, rhs_code)) diff --git a/test/test_scan.py b/test/test_scan.py index c225c2c1c..08754819c 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -182,7 +182,6 @@ def test_nested_scan(ctx_factory, i_tag, j_tag): knl = lp.fix_parameters(knl, n=10) knl = lp.tag_inames(knl, dict(i=i_tag, j=j_tag)) - knl = lp.add_dtypes(knl, dict(tmp=int)) knl = lp.realize_reduction(knl, force_scan=True) print(knl) -- GitLab From 37ca145f1c588fd38179d6f20f52f91d14a78de2 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 19 Jul 2017 01:27:10 -0500 Subject: [PATCH 16/18] Undo changes to preprocess. They were not needed. --- loopy/preprocess.py | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 8172051cc..ced1aaaa1 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -749,9 +749,6 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): # }}} - from loopy.type_inference import TypeInferenceMapper - type_inf_mapper = TypeInferenceMapper(kernel) - from loopy.kernel.instruction import CallInstruction for insn in kernel.instructions: if not isinstance(insn, CallInstruction): @@ -773,24 +770,6 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): FIRST_POINTER_ASSIGNEE_IDX = 1 # noqa - param_dtypes = tuple(type_inf_mapper(param) - for param in insn.expression.parameters) - - func_id = insn.expression.function - - from pymbolic.primitives import Variable - if isinstance(func_id, Variable): - func_id = func_id.name - - mangle_result = kernel.mangle_function(func_id, param_dtypes) - - if mangle_result.target_name == "loopy_make_tuple": - # Skip loopy_make_tuple. This is lowered without a function call. - continue - - assignee_dtypes, = type_inf_mapper( - insn.expression, return_tuple=True, return_dtype_set=True) - for assignee_nr, assignee_var_name, assignee in zip( range(FIRST_POINTER_ASSIGNEE_IDX, len(assignees)), assignee_var_names[FIRST_POINTER_ASSIGNEE_IDX:], @@ -818,11 +797,11 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): newly_added_assignments_ids.add(new_assignment_id) + import loopy as lp new_temporaries[new_assignee_name] = ( TemporaryVariable( name=new_assignee_name, - dtype=assignee_dtypes[assignee_nr].with_target( - kernel.target), + dtype=lp.auto, scope=temp_var_scope.PRIVATE)) from pymbolic import var @@ -1001,19 +980,12 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, var_name_gen(id + "_arg" + str(i)) for i in range(nresults)] - from loopy.type_inference import ( - infer_arg_and_reduction_dtypes_for_reduction_expression) - - _, reduction_dtypes = ( - infer_arg_and_reduction_dtypes_for_reduction_expression( - temp_kernel, expr, unknown_types_ok=False)) - - for name, dtype in zip(temp_var_names, reduction_dtypes): + for name in temp_var_names: from loopy.kernel.data import TemporaryVariable, temp_var_scope new_temporary_variables[name] = TemporaryVariable( name=name, shape=(), - dtype=dtype, + dtype=lp.auto, scope=temp_var_scope.PRIVATE) from pymbolic import var -- GitLab From 896e16df432ca08bf41b960fef9ec8742ab712cd Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 19 Jul 2017 01:27:47 -0500 Subject: [PATCH 17/18] Undo changes to type inference. They were not needed. --- loopy/type_inference.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 45da8eb3e..409cbbc5e 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -443,7 +443,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): result_i = comp_dtype_set break - assert found, var_name + assert found if result_i is not None: result.append(result_i) @@ -672,11 +672,6 @@ def infer_arg_and_reduction_dtypes_for_reduction_expression( raise LoopyError("failed to determine type of accumulator for " "reduction '%s'" % expr) - arg_dtypes = tuple( - dt.with_target(kernel.target) - if dt is not lp.auto else dt - for dt in arg_dtypes) - reduction_dtypes = expr.operation.result_dtypes(kernel, *arg_dtypes) reduction_dtypes = tuple( dt.with_target(kernel.target) -- GitLab From c106a828aa2417ea30a35800e8ba839f484788d8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 19 Jul 2017 01:30:57 -0500 Subject: [PATCH 18/18] Add a name attribute to ReductionOpFunction. --- loopy/library/reduction.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 3c5f4a142..0e5a093b7 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -202,6 +202,10 @@ class ReductionOpFunction(FunctionIdentifier): def __getinitargs__(self): return (self.reduction_op,) + @property + def name(self): + return self.__class__.__name__ + # }}} -- GitLab