From 1f66182f5448e6f02776f2e7fd18c8bbc56d14bd Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 9 Feb 2018 15:15:20 -0600 Subject: [PATCH 001/116] Added the regexp for the vectorized function syntax --- loopy/kernel/creation.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 4a08c28bd..d3ec80e7f 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -413,6 +413,17 @@ INSN_RE = re.compile( r"\s*?" r"(?:\{(?P.+)\}\s*)?$") +VEC_INSN_RE = re.compile( + r"^" + r"\s*" + r"(?P[^{]+?)" + r"\s*(?[^{]+?)" + r"\s*(?.+:.+?)\)" + r"\s*?" + r"(?:\{(?P.+)\}\s*)?$") + EMPTY_LHS_INSN_RE = re.compile( r"^" r"\s*" @@ -433,6 +444,7 @@ SUBST_RE = re.compile( r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$") + def check_illegal_options(insn_options, insn_type): illegal_options = [] if insn_type not in ['gbarrier', 'lbarrier']: @@ -946,7 +958,7 @@ def parse_instructions(instructions, defines): substitutions[subst.name] = subst continue - insn_match = INSN_RE.match(insn) + insn_match = VEC_INSN_RE.match(insn) if insn_match is not None: insn, insn_inames_to_dup = parse_insn( insn_match.groupdict(), insn_options_stack[-1]) @@ -954,6 +966,14 @@ def parse_instructions(instructions, defines): inames_to_dup.append(insn_inames_to_dup) continue + insn_match = INSN_RE.match(insn) + if insn_match is not None: + insn, insn_inames_to_dup = parse_insn( + insn_match.groupdict(), insn_options_stack[-1]) + new_instructions.append(insn) + inames_to_dup.append(insn_inames_to_dup) + continue + insn_match = EMPTY_LHS_INSN_RE.match(insn) if insn_match is not None: insn, insn_inames_to_dup = parse_insn( -- GitLab From cba1e66c3acfcd48c9e22413462abdac8ae97b66 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 9 Feb 2018 17:43:55 -0600 Subject: [PATCH 002/116] Added support to conver the func over arrays to translate to normal iname type things. --- loopy/kernel/creation.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index d3ec80e7f..b6ee21d06 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -549,6 +549,38 @@ def parse_insn(groups, insn_options): # }}} +# {{{ parse_func_over_array + +def parse_func_array_insn(groups, insn_options): + # def parse_insn(groups, insn_options): + """ + This will take in the options for the function call over array and will + convert that to an assignment. This would just involve changing the options. + + """ + function_re = re.compile("\s*(?P.+\s*)\[(?P.+)\]\s*") + func_match = function_re.match(groups["func"]) + new_group = {} + new_group['lhs'] = groups['lhs'] + func_name = func_match.group('func_name') + func_inames = func_match.group('inames').split(",") + substituted_rhs = list(groups['rhs']) + iname_count = 0 + for i, k in enumerate(substituted_rhs): + if k == ':': + substituted_rhs[i] = func_inames[iname_count] + iname_count += 1 + assert iname_count == len(func_inames), ( + 'The number of axis over which' + 'function is to be deployed must equal the given number of' + 'arguments to the function') + new_group['rhs'] = '{func_name}({expr})}'.format( + func_name=func_name, + expr="".join(substituted_rhs)) + return parse_insn(new_group, insn_options) + +# }}} + # {{{ parse_subst_rule -- GitLab From fcaf23c6800f668f1611d853780a96b06e7fca07 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 9 Feb 2018 23:44:15 -0600 Subject: [PATCH 003/116] Still does not do it correctly. --- loopy/kernel/creation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index b6ee21d06..a76ffb5b7 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -574,9 +574,11 @@ def parse_func_array_insn(groups, insn_options): 'The number of axis over which' 'function is to be deployed must equal the given number of' 'arguments to the function') - new_group['rhs'] = '{func_name}({expr})}'.format( + new_group['rhs'] = '{func_name}({expr})'.format( func_name=func_name, expr="".join(substituted_rhs)) + print(new_group['lhs']) + print(new_group['rhs']) return parse_insn(new_group, insn_options) # }}} @@ -992,7 +994,7 @@ def parse_instructions(instructions, defines): insn_match = VEC_INSN_RE.match(insn) if insn_match is not None: - insn, insn_inames_to_dup = parse_insn( + insn, insn_inames_to_dup = parse_func_array_insn( insn_match.groupdict(), insn_options_stack[-1]) new_instructions.append(insn) inames_to_dup.append(insn_inames_to_dup) -- GitLab From 27aebf0d6becb8894577075c3406b79a2d100e26 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 10 Feb 2018 12:30:36 -0600 Subject: [PATCH 004/116] got the thing working. --- loopy/kernel/creation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index a76ffb5b7..e6f0855c7 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -562,6 +562,7 @@ def parse_func_array_insn(groups, insn_options): func_match = function_re.match(groups["func"]) new_group = {} new_group['lhs'] = groups['lhs'] + new_group['options'] = groups['options'] func_name = func_match.group('func_name') func_inames = func_match.group('inames').split(",") substituted_rhs = list(groups['rhs']) -- GitLab From 8e4613c3fb6a9c7187558d51f14ab796d0810cb9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 10 Feb 2018 13:11:18 -0600 Subject: [PATCH 005/116] changed the name from VEC_RE to ARRAY_FUNC --- loopy/kernel/creation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index e6f0855c7..9438cf3cf 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -413,7 +413,7 @@ INSN_RE = re.compile( r"\s*?" r"(?:\{(?P.+)\}\s*)?$") -VEC_INSN_RE = re.compile( +ARRAY_FUNC_RE = re.compile( r"^" r"\s*" r"(?P[^{]+?)" @@ -993,7 +993,7 @@ def parse_instructions(instructions, defines): substitutions[subst.name] = subst continue - insn_match = VEC_INSN_RE.match(insn) + insn_match = ARRAY_FUNC_RE.match(insn) if insn_match is not None: insn, insn_inames_to_dup = parse_func_array_insn( insn_match.groupdict(), insn_options_stack[-1]) -- GitLab From 93b279733c86969a191dcaefc321cd20dec1dcee Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 10 Feb 2018 20:17:27 -0600 Subject: [PATCH 006/116] Removed the mistake that had been done. No parsing is needed --- loopy/kernel/creation.py | 54 ---------------------------------------- 1 file changed, 54 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 9438cf3cf..d0ee20e56 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -413,17 +413,6 @@ INSN_RE = re.compile( r"\s*?" r"(?:\{(?P.+)\}\s*)?$") -ARRAY_FUNC_RE = re.compile( - r"^" - r"\s*" - r"(?P[^{]+?)" - r"\s*(?[^{]+?)" - r"\s*(?.+:.+?)\)" - r"\s*?" - r"(?:\{(?P.+)\}\s*)?$") - EMPTY_LHS_INSN_RE = re.compile( r"^" r"\s*" @@ -549,41 +538,6 @@ def parse_insn(groups, insn_options): # }}} -# {{{ parse_func_over_array - -def parse_func_array_insn(groups, insn_options): - # def parse_insn(groups, insn_options): - """ - This will take in the options for the function call over array and will - convert that to an assignment. This would just involve changing the options. - - """ - function_re = re.compile("\s*(?P.+\s*)\[(?P.+)\]\s*") - func_match = function_re.match(groups["func"]) - new_group = {} - new_group['lhs'] = groups['lhs'] - new_group['options'] = groups['options'] - func_name = func_match.group('func_name') - func_inames = func_match.group('inames').split(",") - substituted_rhs = list(groups['rhs']) - iname_count = 0 - for i, k in enumerate(substituted_rhs): - if k == ':': - substituted_rhs[i] = func_inames[iname_count] - iname_count += 1 - assert iname_count == len(func_inames), ( - 'The number of axis over which' - 'function is to be deployed must equal the given number of' - 'arguments to the function') - new_group['rhs'] = '{func_name}({expr})'.format( - func_name=func_name, - expr="".join(substituted_rhs)) - print(new_group['lhs']) - print(new_group['rhs']) - return parse_insn(new_group, insn_options) - -# }}} - # {{{ parse_subst_rule @@ -993,14 +947,6 @@ def parse_instructions(instructions, defines): substitutions[subst.name] = subst continue - insn_match = ARRAY_FUNC_RE.match(insn) - if insn_match is not None: - insn, insn_inames_to_dup = parse_func_array_insn( - insn_match.groupdict(), insn_options_stack[-1]) - new_instructions.append(insn) - inames_to_dup.append(insn_inames_to_dup) - continue - insn_match = INSN_RE.match(insn) if insn_match is not None: insn, insn_inames_to_dup = parse_insn( -- GitLab From 3ddece002794e9c3dd70155b49b9638ddd1f0b27 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Feb 2018 13:01:30 -0600 Subject: [PATCH 007/116] Got started with teh ArrayCall --- loopy/library/array_call.py | 143 ++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 loopy/library/array_call.py diff --git a/loopy/library/array_call.py b/loopy/library/array_call.py new file mode 100644 index 000000000..90c2b4e0d --- /dev/null +++ b/loopy/library/array_call.py @@ -0,0 +1,143 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +from pymbolic.primitives import Expression + +from loopy.diagnostic import LoopyError + + +class ArrayCall(object): + """Subclasses of this type have to be hashable, picklable, and + equality-comparable. + """ + + def __init__(self, _func, _inames, _rhs): + assert isinstance(_inames, tuple) + assert isinstance(_rhs, Expression) + self.func = _func + self.rhs = _rhs + self.inames = _inames + + def result_dtypes(self, target, *arg_dtypes): + """ + :arg arg_dtypes: may be None if not known + :returns: None if not known, otherwise the returned type + """ + + raise NotImplementedError + + @property + def arg_count(self): + raise NotImplementedError + + def __hash__(self): + # Force subclasses to override + raise NotImplementedError + + def __eq__(self, other): + # Force subclasses to override + raise NotImplementedError + + def __call__(self, dtype, operand1, operand2): + raise NotImplementedError + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + result = "{ArrayCall(({inames}), {rhs}}".format( + inames=",".join(self.inames), + rhs=str(self.rhs)) + + return result + + +def array_call_mangler(kernel, func_id, arg_dtypes): + if isinstance(func_id, ArgExtOp): + from loopy.target.opencl import CTarget + if not isinstance(kernel.target, CTarget): + raise LoopyError("%s: only C-like targets supported for now" % func_id) + + op = func_id.reduction_op + scalar_dtype = arg_dtypes[0] + index_dtype = arg_dtypes[1] + + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="%s_op" % op.prefix( + scalar_dtype, index_dtype), + result_dtypes=op.result_dtypes( + kernel, scalar_dtype, index_dtype), + arg_dtypes=( + scalar_dtype, + index_dtype, + scalar_dtype, + index_dtype), + ) + + elif isinstance(func_id, SegmentedOp): + from loopy.target.opencl import CTarget + if not isinstance(kernel.target, CTarget): + raise LoopyError("%s: only C-like targets supported for now" % func_id) + + op = func_id.reduction_op + scalar_dtype = arg_dtypes[0] + segment_flag_dtype = arg_dtypes[1] + + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="%s_op" % op.prefix( + scalar_dtype, segment_flag_dtype), + result_dtypes=op.result_dtypes( + kernel, scalar_dtype, segment_flag_dtype), + arg_dtypes=( + scalar_dtype, + segment_flag_dtype, + scalar_dtype, + segment_flag_dtype), + ) + + return None + + +def array_call_preamble_generator(preamble_info): + from loopy.target.opencl import OpenCLTarget + + for func in preamble_info.seen_functions: + if isinstance(func.name, ArgExtOp): + if not isinstance(preamble_info.kernel.target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + + yield get_argext_preamble(preamble_info.kernel, func.name, + func.arg_dtypes) + + elif isinstance(func.name, SegmentedOp): + if not isinstance(preamble_info.kernel.target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + + yield get_segmented_function_preamble(preamble_info.kernel, func.name, + func.arg_dtypes) + +# vim: fdm=marker -- GitLab From c782529d70bd2f7adad956246d117c82b824c7fb Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Feb 2018 13:47:09 -0600 Subject: [PATCH 008/116] Declared the class `ArrayCall` --- loopy/symbolic.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 9e16c3a59..2a2eb3c58 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -597,6 +597,66 @@ class Reduction(p.Expression): mapper_method = intern("map_reduction") +class ArrayCall(p.Expression): + """Represents an function call over an array across :attr: `inames`. + + .. attribute:: func + + a string indicating the name of the function that is supposed to be + called. For ex. `'sin'`, `'cos'` + + .. attribute:: inames + + a tuple of inames across which the function is to be called. + + .. attribute:: rhs + + an instance of :class: `pymbolic.primtives.Expression` + """ + + init_arg_names = ("func", "inames", "rhs") + + def __init__(self, _func, _inames, _rhs): + + # {{{ Input Sanity Checks + assert isinstance(_func, str) + + assert isinstance(_inames, tuple) + for iname in _inames: + assert isinstance(iname, p.Variable) + + assert isinstance(_rhs, p.Expression) + # }}} + + self.func = _func + self.inames = _inames + self.rhs = _rhs + + def __getinitargs__(self): + return (self.func, self.inames, self.rhs) + + def get_hash(self): + return hash((self.__class__, self.func, self.inames, self.rhs)) + + def is_equal(self, other): + return (other.__class__ == self.__class__ + and other.func == self.func + and other.inames == self.inames + and other.rhs == self.rhs) + + def stringifier(self): + # FIXME: Add this functionality + raise NotImplementedError('Will add this in near future') + return StringifyMapper + + @property + @memoize_method + def inames_set(self): + return set(self.inames) + + mapper_method = intern("map_array_call") + + class LinearSubscript(p.Expression): """Represents a linear index into a multi-dimensional array, completely ignoring any multi-dimensional layout. -- GitLab From b8a6b9e75cc666b95b5ff621a1f794fd42faa1d1 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Feb 2018 15:29:35 -0600 Subject: [PATCH 009/116] Added more functionality for the ArrayCallOperation --- loopy/library/array_call.py | 116 ++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 59 deletions(-) diff --git a/loopy/library/array_call.py b/loopy/library/array_call.py index 90c2b4e0d..40db7ae55 100644 --- a/loopy/library/array_call.py +++ b/loopy/library/array_call.py @@ -28,14 +28,18 @@ from pymbolic.primitives import Expression from loopy.diagnostic import LoopyError -class ArrayCall(object): +class ArrayCallOperation(object): """Subclasses of this type have to be hashable, picklable, and equality-comparable. """ def __init__(self, _func, _inames, _rhs): + + # {{{ Sanity Check assert isinstance(_inames, tuple) assert isinstance(_rhs, Expression) + # }}} + self.func = _func self.rhs = _rhs self.inames = _inames @@ -74,70 +78,64 @@ class ArrayCall(object): return result -def array_call_mangler(kernel, func_id, arg_dtypes): - if isinstance(func_id, ArgExtOp): - from loopy.target.opencl import CTarget - if not isinstance(kernel.target, CTarget): - raise LoopyError("%s: only C-like targets supported for now" % func_id) - - op = func_id.reduction_op - scalar_dtype = arg_dtypes[0] - index_dtype = arg_dtypes[1] - - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="%s_op" % op.prefix( - scalar_dtype, index_dtype), - result_dtypes=op.result_dtypes( - kernel, scalar_dtype, index_dtype), - arg_dtypes=( - scalar_dtype, - index_dtype, - scalar_dtype, - index_dtype), - ) - - elif isinstance(func_id, SegmentedOp): - from loopy.target.opencl import CTarget - if not isinstance(kernel.target, CTarget): - raise LoopyError("%s: only C-like targets supported for now" % func_id) - - op = func_id.reduction_op - scalar_dtype = arg_dtypes[0] - segment_flag_dtype = arg_dtypes[1] - - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="%s_op" % op.prefix( - scalar_dtype, segment_flag_dtype), - result_dtypes=op.result_dtypes( - kernel, scalar_dtype, segment_flag_dtype), - arg_dtypes=( - scalar_dtype, - segment_flag_dtype, - scalar_dtype, - segment_flag_dtype), - ) +# {{{ array_call op registry - return None +_ARRAY_CALL_OP_PARSERS = [ + ] + + +def register_array_call_op(parser): + """ + Register a new :class:`ArrayCallOperation`. + :arg parser: A function that receives a string and returns + a subclass of ArrayCallOperation. + """ + import re + _ARRAY_CALL_OP_PARSERS.append(parser) -def array_call_preamble_generator(preamble_info): - from loopy.target.opencl import OpenCLTarget +def parse_array_call_op(name): + import re - for func in preamble_info.seen_functions: - if isinstance(func.name, ArgExtOp): - if not isinstance(preamble_info.kernel.target, OpenCLTarget): - raise LoopyError("only OpenCL supported for now") + array_call_op_match = re.match(r"^(?P[a-z]+)\[(?P.+)\]" + r"\((?P.+:.+)\)$", + name) - yield get_argext_preamble(preamble_info.kernel, func.name, - func.arg_dtypes) + if array_call_op_match: + func = array_call_op_match.group('func') + inames = array_call_op_match.group('iname') + rhs = array_call_op_match.group('rhs') - elif isinstance(func.name, SegmentedOp): - if not isinstance(preamble_info.kernel.target, OpenCLTarget): - raise LoopyError("only OpenCL supported for now") + iname_tuple = tuple(inames.split(",")) - yield get_segmented_function_preamble(preamble_info.kernel, func.name, - func.arg_dtypes) + return ArrayCallOperation(func, iname_tuple, rhs) + + return None + + +def array_call_mangler(kernel, func_name, arg_dtypes): + from loopy.target.opencl import CTarget + if not isinstance(kernel.target, CTarget): + raise LoopyError("%s: only C-like targets supported for now" % ( + func_name)) + + op = func_name + scalar_dtype = arg_dtypes[0] + index_dtype = arg_dtypes[1] + + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="%s_op" % op.prefix( + scalar_dtype, index_dtype), + result_dtypes=op.result_dtypes( + kernel, scalar_dtype, index_dtype), + arg_dtypes=( + scalar_dtype, + index_dtype, + scalar_dtype, + index_dtype), + ) + + return None # vim: fdm=marker -- GitLab From 94d50c67534cd5f75d372856721c3f4b50f415b5 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Feb 2018 18:13:41 -0600 Subject: [PATCH 010/116] Frontend is almost established --- loopy/symbolic.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 2a2eb3c58..4225d34a9 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -106,6 +106,12 @@ class IdentityMapperMixin(object): def map_type_annotation(self, expr, *args): return type(expr)(expr.type, self.rec(expr.child)) + def map_array_call(self, expr, *args): + mapped_inames = [self.rec(p.Variable(iname), *args) for iname in expr.inames] + + new_inames + + map_type_cast = map_type_annotation map_linear_subscript = IdentityMapperBase.map_subscript @@ -1097,12 +1103,26 @@ class FunctionToPrimitiveMapper(IdentityMapper): return Reduction(operation, tuple(processed_inames), red_exprs, allow_simultaneous=allow_simultaneous) + def _parse_array_call(self, func, inames, rhs): + if isinstance(inames, P.Variable): + inames = (inames, ) + if not isinstance(inames, (tuple)) + raise TypeError("iname argument to array_call() must be a symbol " + "or a tuple of symbols") + + return ArrayCall(func, inames, rhs) + def map_call(self, expr): from loopy.library.reduction import parse_reduction_op if not isinstance(expr.function, p.Variable): return IdentityMapper.map_call(self, expr) + if isinstance(expr.function, p.Subscript): + func = expr.function.aggregate + return _parse_array_call(str(func.agrregate), func.index, + func.parameters) + name = expr.function.name if name == "cse": if len(expr.parameters) in [1, 2]: -- GitLab From ee1e295a0977c47d505a9e634d2c58cbda1cbfbe Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Feb 2018 18:25:12 -0600 Subject: [PATCH 011/116] Removes the parsing methods for array call --- loopy/library/array_call.py | 37 +++---------------------------------- 1 file changed, 3 insertions(+), 34 deletions(-) diff --git a/loopy/library/array_call.py b/loopy/library/array_call.py index 40db7ae55..c7f6b0539 100644 --- a/loopy/library/array_call.py +++ b/loopy/library/array_call.py @@ -77,40 +77,7 @@ class ArrayCallOperation(object): return result - -# {{{ array_call op registry - -_ARRAY_CALL_OP_PARSERS = [ - ] - - -def register_array_call_op(parser): - """ - Register a new :class:`ArrayCallOperation`. - :arg parser: A function that receives a string and returns - a subclass of ArrayCallOperation. - """ - import re - _ARRAY_CALL_OP_PARSERS.append(parser) - - -def parse_array_call_op(name): - import re - - array_call_op_match = re.match(r"^(?P[a-z]+)\[(?P.+)\]" - r"\((?P.+:.+)\)$", - name) - - if array_call_op_match: - func = array_call_op_match.group('func') - inames = array_call_op_match.group('iname') - rhs = array_call_op_match.group('rhs') - - iname_tuple = tuple(inames.split(",")) - - return ArrayCallOperation(func, iname_tuple, rhs) - - return None +# {{{ Array Call Mangler def array_call_mangler(kernel, func_name, arg_dtypes): @@ -138,4 +105,6 @@ def array_call_mangler(kernel, func_name, arg_dtypes): return None +# }}} + # vim: fdm=marker -- GitLab From 85173a96f90fca744e5bb51a3cf088bb79b96e63 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 00:32:30 -0600 Subject: [PATCH 012/116] Fixed according to tracking/kaushik/#53 --- loopy/symbolic.py | 70 ++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 4225d34a9..6350d47c1 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -107,9 +107,8 @@ class IdentityMapperMixin(object): return type(expr)(expr.type, self.rec(expr.child)) def map_array_call(self, expr, *args): - mapped_inames = [self.rec(p.Variable(iname), *args) for iname in expr.inames] - - new_inames + return ArrayCall(expr.func_call, + expr.inames) map_type_cast = map_type_annotation @@ -169,10 +168,18 @@ class WalkMapper(WalkMapperBase): map_rule_argument = map_group_hw_index + def map_array_call(self, expr, *args): + if not self.visit(expr): + return + + return self.map_call(expr.func_call, *args) + class CallbackMapper(CallbackMapperBase, IdentityMapper): map_reduction = CallbackMapperBase.map_constant + # FXIME: Need to undertand what would this be + class CombineMapper(CombineMapperBase): def map_reduction(self, expr): @@ -180,6 +187,10 @@ class CombineMapper(CombineMapperBase): map_linear_subscript = CombineMapperBase.map_subscript + def map_reduction(self, expr): + # FIXME: Come up with a final definition + return self.rec(expr.rhs) + class SubstitutionMapper( CSECachingMapperMixin, SubstitutionMapperBase, IdentityMapperMixin): @@ -236,6 +247,12 @@ class StringifyMapper(StringifyMapperBase): from pymbolic.mapper.stringifier import PREC_NONE return "cast(%s, %s)" % (repr(expr.type), self.rec(expr.child, PREC_NONE)) + def map_array_call(self, expr, prec): + return "ArrayCall({func_name}[{inames}]({rhs}))".format( + func_name=expr.func_call.function.name, + inames=expr.inames, + rhs=expr.func_call.parameters) + class UnidirectionalUnifier(UnidirectionalUnifierBase): def map_reduction(self, expr, other, unis): @@ -264,6 +281,8 @@ class UnidirectionalUnifier(UnidirectionalUnifierBase): from pymbolic.mapper.unifier import unify_many return unify_many(urecs, new_uni_record) + # FIXME: Need to add mapper method for array_call + class DependencyMapper(DependencyMapperBase): def map_group_hw_index(self, expr): @@ -288,6 +307,9 @@ class DependencyMapper(DependencyMapperBase): def map_loopy_function_identifier(self, expr): return set() + def map_array_call(self, expr, *args): + return self.map_call(expr.func_call) + map_linear_subscript = DependencyMapperBase.map_subscript def map_type_cast(self, expr): @@ -620,39 +642,35 @@ class ArrayCall(p.Expression): an instance of :class: `pymbolic.primtives.Expression` """ - init_arg_names = ("func", "inames", "rhs") + init_arg_names = ("func_call", "inames") - def __init__(self, _func, _inames, _rhs): + def __init__(self, _func_call, _inames): # {{{ Input Sanity Checks - assert isinstance(_func, str) + assert isinstance(_func_call, p.Call) assert isinstance(_inames, tuple) for iname in _inames: assert isinstance(iname, p.Variable) - assert isinstance(_rhs, p.Expression) + # FIXME: Need to assert that the # }}} - self.func = _func + self.func_call = _func_call self.inames = _inames - self.rhs = _rhs def __getinitargs__(self): - return (self.func, self.inames, self.rhs) + return (self.func_call, self.inames) def get_hash(self): - return hash((self.__class__, self.func, self.inames, self.rhs)) + return hash((self.__class__, self.func_call, self.inames)) def is_equal(self, other): return (other.__class__ == self.__class__ - and other.func == self.func - and other.inames == self.inames - and other.rhs == self.rhs) + and other.func_call == self.func_call + and other.inames == self.inames) def stringifier(self): - # FIXME: Add this functionality - raise NotImplementedError('Will add this in near future') return StringifyMapper @property @@ -1103,26 +1121,28 @@ class FunctionToPrimitiveMapper(IdentityMapper): return Reduction(operation, tuple(processed_inames), red_exprs, allow_simultaneous=allow_simultaneous) - def _parse_array_call(self, func, inames, rhs): - if isinstance(inames, P.Variable): + def _parse_array_call(self, func_call, inames): + # {{{ Input sanity check + if isinstance(inames, p.Variable): inames = (inames, ) - if not isinstance(inames, (tuple)) + if not isinstance(inames, (tuple)): raise TypeError("iname argument to array_call() must be a symbol " "or a tuple of symbols") + # }}} - return ArrayCall(func, inames, rhs) + return ArrayCall(func_call, inames) def map_call(self, expr): from loopy.library.reduction import parse_reduction_op + if isinstance(expr.function, p.Subscript): + func = expr.function + return self._parse_array_call(p.Call(func.aggregate, + expr.parameters), func.index) + if not isinstance(expr.function, p.Variable): return IdentityMapper.map_call(self, expr) - if isinstance(expr.function, p.Subscript): - func = expr.function.aggregate - return _parse_array_call(str(func.agrregate), func.index, - func.parameters) - name = expr.function.name if name == "cse": if len(expr.parameters) in [1, 2]: -- GitLab From 14027e83f1b8a030128899f75b71368c2376ffd3 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 00:47:40 -0600 Subject: [PATCH 013/116] Added the type inference for ArrayCall --- loopy/type_inference.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index fcf8f965b..50a54e348 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -399,6 +399,9 @@ class TypeInferenceMapper(CombineMapper): return [expr.operation.result_dtypes(self.kernel, rec_result)[0] for rec_result in rec_results] + def map_array_call(self, expr, *args): + return self.map_call(expr.func_call, *args) + # }}} -- GitLab From b2e32c5031200fa8b93101fa3a2e1225c14bbbd5 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 11:32:46 -0600 Subject: [PATCH 014/116] Started with ArrayCallInstrruction --- loopy/kernel/instruction.py | 160 ++++++++++++++++++++++++++++++++++-- 1 file changed, 152 insertions(+), 8 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 9d95408ac..c29ae5aab 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1028,6 +1028,144 @@ class CallInstruction(MultiAssignmentBase): # }}} +# {{{ instruction: function call over an array + +class ArrayCallInstruction(MultiAssignmentBase): + """An instruction capturing a function call on array. Unlike + :class:`CallInstruction`, this performs operations on arrays and is + eventually supposed to have invocations from "library-functions" + and this instruction supports functions with multiple return values. + + .. attribute:: assignees + + A :class:`tuple` of left-hand sides for the assignment + + .. attribute:: expression + + The expression that dictates the RHS + + The following attributes are only used until + :func:`loopy.make_kernel` is finished: + + .. attribute:: temp_var_types + + if not *None*, a type that will be assigned to the new temporary variable + created from the assignee + + .. automethod:: __init__ + """ + + fields = MultiAssignmentBase.fields | \ + set("assignees temp_var_types".split()) + pymbolic_fields = MultiAssignmentBase.pymbolic_fields | set(["assignees"]) + + def __init__(self, + assignees, expression, + id=None, + depends_on=None, + depends_on_is_final=None, + groups=None, + conflicts_with_groups=None, + no_sync_with=None, + within_inames_is_final=None, + within_inames=None, + boostable=None, boostable_into=None, tags=None, + temp_var_types=None, + priority=0, predicates=frozenset(), + insn_deps=None, insn_deps_is_final=None, + forced_iname_deps=None, + forced_iname_deps_is_final=None): + + super(ArrayCallInstruction, self).__init__( + id=id, + depends_on=depends_on, + depends_on_is_final=depends_on_is_final, + groups=groups, + conflicts_with_groups=conflicts_with_groups, + no_sync_with=no_sync_with, + within_inames_is_final=within_inames_is_final, + within_inames=within_inames, + boostable=boostable, + boostable_into=boostable_into, + priority=priority, + predicates=predicates, + tags=tags, + insn_deps=insn_deps, + insn_deps_is_final=insn_deps_is_final, + forced_iname_deps=forced_iname_deps, + forced_iname_deps_is_final=forced_iname_deps_is_final) + + from loopy.symbolic import ArrayCall + if not isinstance(expression, ArrayCall) and expression is not None: + raise LoopyError("'expression' argument to CallInstruction " + "must be a ArrayCall") + + from loopy.symbolic import parse + if isinstance(assignees, str): + assignees = parse(assignees) + if not isinstance(assignees, tuple): + raise LoopyError("'assignees' argument to CallInstruction " + "must be a tuple or a string parseable to a tuple" + "--got '%s'" % type(assignees).__name__) + + from pymbolic.primitives import Variable, Subscript + from loopy.symbolic import LinearSubscript + for assignee in assignees: + if not isinstance(assignee, (Variable, Subscript, LinearSubscript)): + raise LoopyError("invalid lvalue '%s'" % assignee) + + self.assignees = assignees + self.expression = expression + + if temp_var_types is None: + self.temp_var_types = (None,) * len(self.assignees) + else: + self.temp_var_types = temp_var_types + + # {{{ implement InstructionBase interface + + # FIXME: Didn't look into this. Maybe need to replace this + @memoize_method + def assignee_var_names(self): + return tuple(_get_assignee_var_name(a) for a in self.assignees) + + def assignee_subscript_deps(self): + return tuple( + _get_assignee_subscript_deps(a) + for a in self.assignees) + + def with_transformed_expressions(self, f, *args): + return self.copy( + assignees=f(self.assignees, *args), + expression=f(self.expression, *args), + predicates=frozenset( + f(pred, *args) for pred in self.predicates)) + + # }}} + + def __str__(self): + result = "%s: %s <- %s" % (self.id, + ", ".join(str(a) for a in self.assignees), + self.expression) + + options = self.get_str_options() + if options: + result += " {%s}" % (": ".join(options)) + + if self.predicates: + result += "\n" + 10*" " + "if (%s)" % " && ".join(self.predicates) + return result + + @property + def atomicity(self): + # Function calls can impossibly be atomic, and even the result assignment + # is troublesome, especially in the case of multiple results. Avoid the + # issue altogether by disallowing atomicity. + return () + +# }}} + + def make_assignment(assignees, expression, temp_var_types=None, **kwargs): if len(assignees) > 1 or len(assignees) == 0: atomicity = kwargs.pop("atomicity", ()) @@ -1036,16 +1174,22 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): "left-hand side not supported") from pymbolic.primitives import Call - from loopy.symbolic import Reduction - if not isinstance(expression, (Call, Reduction)): + from loopy.symbolic import Reduction, ArrayCall + if not isinstance(expression, (ArrayCall, Call, Reduction)): raise LoopyError("right-hand side in multiple assignment must be " "function call or reduction, got: '%s'" % expression) - - return CallInstruction( - assignees=assignees, - expression=expression, - temp_var_types=temp_var_types, - **kwargs) + if isinstance(expression, (Call, Reduction)): + return CallInstruction( + assignees=assignees, + expression=expression, + temp_var_types=temp_var_types, + **kwargs) + else: + return ArrayCallInstruction( + assignees=assignees, + expression=expression, + temp_var_types=temp_var_types, + **kwargs) else: return Assignment( -- GitLab From f66feeeb413da9353cb99d10930ad2a0751d8e5a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 11:39:25 -0600 Subject: [PATCH 015/116] Added support for ArrayCallInstruction --- loopy/codegen/instruction.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index e590502fb..0e5dfbe73 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -75,12 +75,15 @@ def to_codegen_result( def generate_instruction_code(codegen_state, insn): kernel = codegen_state.kernel - from loopy.kernel.instruction import Assignment, CallInstruction, CInstruction + from loopy.kernel.instruction import (Assignment, CallInstruction, + CInstruction, ArrayCallInstruction) if isinstance(insn, Assignment): ast = generate_assignment_instruction_code(codegen_state, insn) elif isinstance(insn, CallInstruction): ast = generate_call_code(codegen_state, insn) + elif isinstance(insn, ArrayCallInstruction): + ast = generate_array_call_code(codegen_state,insn) elif isinstance(insn, CInstruction): ast = generate_c_instruction_code(codegen_state, insn) else: @@ -243,6 +246,30 @@ def generate_call_code(codegen_state, insn): return result +def generate_array_call_code(codegen_state, insn): + kernel = codegen_state.kernel + + # {{{ vectorization handling + + if codegen_state.vectorization_info: + if insn.atomicity: + raise Unvectorizable("atomic operation") + + # }}} + + result = codegen_state.ast_builder.emit_multiple_assignment( + codegen_state, insn) + + # {{{ tracing + + if kernel.options.trace_assignments or kernel.options.trace_assignment_values: + raise NotImplementedError("tracing of multi-output function calls") + + # }}} + + return result + + def generate_c_instruction_code(codegen_state, insn): kernel = codegen_state.kernel -- GitLab From 26161cc08ade4ff31273bc8ea287f7a6c7ac59c9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 11:41:40 -0600 Subject: [PATCH 016/116] No need to preprocess ArrayCallInstruction --- loopy/preprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index ad119e94e..c5fc25d92 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -749,9 +749,9 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): # }}} - from loopy.kernel.instruction import CallInstruction + from loopy.kernel.instruction import CallInstruction, ArrayCallInstruction for insn in kernel.instructions: - if not isinstance(insn, CallInstruction): + if not isinstance(insn, (CallInstruction, ArrayCallInstruction)): continue if len(insn.assignees) <= 1: -- GitLab From 732ad10862bb26944cd09a62a96289b2bfe43e4c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 11:49:33 -0600 Subject: [PATCH 017/116] Fixing typos --- loopy/kernel/instruction.py | 2 +- loopy/target/c/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index c29ae5aab..c55951e6d 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1032,7 +1032,7 @@ class CallInstruction(MultiAssignmentBase): class ArrayCallInstruction(MultiAssignmentBase): """An instruction capturing a function call on array. Unlike - :class:`CallInstruction`, this performs operations on arrays and is + :class:`ArrayCallInstruction`, this performs operations on arrays and is eventually supposed to have invocations from "library-functions" and this instruction supports functions with multiple return values. diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 177daa029..5624128d8 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -862,7 +862,7 @@ class CASTBuilder(ASTBuilderBase): assert mangle_result.arg_dtypes is not None if mangle_result.target_name == "loopy_make_tuple": - # This shorcut avoids actually having to emit a 'make_tuple' function. + # This shortcut avoids actually having to emit a 'make_tuple' function. return self.emit_tuple_assignment(codegen_state, insn) from loopy.expression import dtype_to_type_context -- GitLab From 7745d691fd007d9a1609def7736af1e94bf43b25 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 18:58:52 -0600 Subject: [PATCH 018/116] Mad sure that ArrayCallInstruction works --- loopy/kernel/instruction.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index c55951e6d..dba85ffb1 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1174,8 +1174,8 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): "left-hand side not supported") from pymbolic.primitives import Call - from loopy.symbolic import Reduction, ArrayCall - if not isinstance(expression, (ArrayCall, Call, Reduction)): + from loopy.symbolic import Reduction + if not isinstance(expression, (Call, Reduction)): raise LoopyError("right-hand side in multiple assignment must be " "function call or reduction, got: '%s'" % expression) if isinstance(expression, (Call, Reduction)): @@ -1184,12 +1184,17 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): expression=expression, temp_var_types=temp_var_types, **kwargs) - else: - return ArrayCallInstruction( - assignees=assignees, - expression=expression, - temp_var_types=temp_var_types, - **kwargs) + from loopy.symbolic import ArrayCall + if isinstance(expression, ArrayCall): + atomicity = kwargs.pop("atomicity", ()) + if atomicity: + raise LoopyError("atomic operations with more than one " + "left-hand side not supported") + return ArrayCallInstruction( + assignees=assignees, + expression=expression, + temp_var_types=temp_var_types, + **kwargs) else: return Assignment( -- GitLab From bbbb34bb4089f22e92b0c405413b736041cc0342 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 19:02:28 -0600 Subject: [PATCH 019/116] Invoked ArrayCallInstruction at the top library level. --- loopy/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 5e8a3fb06..92f5056fb 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -40,7 +40,8 @@ from loopy.kernel.instruction import ( memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate, InstructionBase, MultiAssignmentBase, Assignment, ExpressionInstruction, - CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction) + CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction, + ArrayCallInstruction) from loopy.kernel.data import ( auto, KernelArgument, -- GitLab From 8e16ccd016318425a34d5c065d7e13522d95a8ad Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 19:17:42 -0600 Subject: [PATCH 020/116] Added support for ArrayCall --- loopy/type_inference.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 50a54e348..7537186ec 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -399,8 +399,8 @@ class TypeInferenceMapper(CombineMapper): return [expr.operation.result_dtypes(self.kernel, rec_result)[0] for rec_result in rec_results] - def map_array_call(self, expr, *args): - return self.map_call(expr.func_call, *args) + def map_array_call(self, expr, return_tuple): + return self.map_call(expr.func_call, return_tuple) # }}} @@ -448,6 +448,24 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): assert found if result_i is not None: result.append(result_i) + elif isinstance(writer_insn, lp.ArrayCallInstruction): + return_dtype_set = type_inf_mapper(expr, return_tuple=True, + return_dtype_set=True) + + result = [] + for return_dtype_set in return_dtype_set: + result_i = None + found = False + for assignee, comp_dtype_set in zip( + writer_insn.assignee_var_names(), return_dtype_set): + if assignee == var_name: + found = True + result_i = comp_dtype_set + break + + assert found + if result_i is not None: + result.append(result_i) debug(" result: %s", result) -- GitLab From 0c2c00fb0065b8c545abba00ff533abbfdcf3346 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Feb 2018 22:34:57 -0600 Subject: [PATCH 021/116] ArrayCall Instruction works fine as an independent Assignment as well as a different Instruction type --- loopy/codegen/instruction.py | 2 +- loopy/kernel/instruction.py | 18 +++---- loopy/library/function.py | 14 ++++- loopy/target/c/__init__.py | 79 ++++++++++++++++++++++++++++ loopy/target/c/codegen/expression.py | 5 ++ loopy/type_inference.py | 4 +- 6 files changed, 109 insertions(+), 13 deletions(-) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 0e5dfbe73..aad755ee5 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -257,7 +257,7 @@ def generate_array_call_code(codegen_state, insn): # }}} - result = codegen_state.ast_builder.emit_multiple_assignment( + result = codegen_state.ast_builder.emit_array_call( codegen_state, insn) # {{{ tracing diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index dba85ffb1..f6c5c2b13 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1184,6 +1184,7 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): expression=expression, temp_var_types=temp_var_types, **kwargs) + from loopy.symbolic import ArrayCall if isinstance(expression, ArrayCall): atomicity = kwargs.pop("atomicity", ()) @@ -1196,15 +1197,14 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): temp_var_types=temp_var_types, **kwargs) - else: - return Assignment( - assignee=assignees[0], - expression=expression, - temp_var_type=( - temp_var_types[0] - if temp_var_types is not None - else None), - **kwargs) + return Assignment( + assignee=assignees[0], + expression=expression, + temp_var_type=( + temp_var_types[0] + if temp_var_types is not None + else None), + **kwargs) # {{{ c instruction diff --git a/loopy/library/function.py b/loopy/library/function.py index 9d557ac9f..634b10418 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -26,7 +26,8 @@ THE SOFTWARE. def default_function_mangler(kernel, name, arg_dtypes): from loopy.library.reduction import reduction_function_mangler - manglers = [reduction_function_mangler, tuple_function_mangler] + manglers = [reduction_function_mangler, tuple_function_mangler, + array_call_function_mangler] for mangler in manglers: result = mangler(kernel, name, arg_dtypes) if result is not None: @@ -56,4 +57,15 @@ def tuple_function_mangler(kernel, name, arg_dtypes): return None +def array_call_function_mangler(kernel, name, arg_dtypes): + if name[:5] == "array": + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name=name, + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + return None + + # vim: foldmethod=marker diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 5624128d8..dea103b9e 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -961,6 +961,85 @@ class CASTBuilder(ASTBuilderBase): from cgen import If return If(condition_str, ast) + def emit_array_call(self, codegen_state, insn): + ecm = codegen_state.expression_to_code_mapper + + from pymbolic.primitives import Variable + from pymbolic.mapper.stringifier import PREC_NONE + + func_id = "array_"+insn.expression.func_call.function.name + parameters = insn.expression.func_call.parameters + + assignee_var_descriptors = [ + codegen_state.kernel.get_var_descriptor(a) + for a in insn.assignee_var_names()] + + par_dtypes = tuple(ecm.infer_type(par) for par in parameters) + + mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) + if mangle_result is None: + raise RuntimeError("function '%s' unknown--" + "maybe you need to register a function mangler?" + % func_id) + + assert mangle_result.arg_dtypes is not None + + ''' + # Not exactly sure what this is doing will need to think about it + # later. + if mangle_result.target_name == "loopy_make_tuple": + # This shortcut avoids actually having to emit a 'make_tuple' function. + return self.emit_tuple_assignment(codegen_state, insn) + ''' + + from loopy.expression import dtype_to_type_context + c_parameters = [ + ecm(par, PREC_NONE, + dtype_to_type_context(self.target, tgt_dtype), + tgt_dtype).expr + for par, par_dtype, tgt_dtype in zip( + parameters, par_dtypes, mangle_result.arg_dtypes)] + + from loopy.codegen import SeenFunction + codegen_state.seen_functions.add( + SeenFunction(func_id, + mangle_result.target_name, + mangle_result.arg_dtypes)) + + from pymbolic import var + for i, (a, tgt_dtype) in enumerate( + zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): + if tgt_dtype != ecm.infer_type(a): + raise LoopyError("type mismatch in %d'th (1-based) left-hand " + "side of instruction '%s'" % (i+1, insn.id)) + c_parameters.append( + # TODO Yuck: The "where-at function": &(...) + var("&")( + ecm(a, PREC_NONE, + dtype_to_type_context(self.target, tgt_dtype), + tgt_dtype).expr)) + + from pymbolic import var + result = var(mangle_result.target_name)(*c_parameters) + + # In case of no assignees, we are done + if len(mangle_result.result_dtypes) == 0: + from cgen import ExpressionStatement + return ExpressionStatement( + CExpression(self.get_c_expression_to_code_mapper(), result)) + + result = ecm.wrap_in_typecast( + mangle_result.result_dtypes[0], + assignee_var_descriptors[0].dtype, + result) + + lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) + + from cgen import Assign + return Assign( + lhs_code, + CExpression(self.get_c_expression_to_code_mapper(), result)) + # }}} def process_ast(self, node): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index c111a02b7..2be250121 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -693,6 +693,11 @@ class ExpressionToCExpressionMapper(IdentityMapper): def map_local_hw_index(self, expr, type_context): raise LoopyError("plain C does not have local hw axes") + def map_array_call(self, expr, type_context): + # The call came over here, that means which means that the ArrayCall + # has been converted to an assignment. + return self.map_call(expr.func_call, type_context) + # }}} diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 7537186ec..7a7c2b901 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -399,7 +399,7 @@ class TypeInferenceMapper(CombineMapper): return [expr.operation.result_dtypes(self.kernel, rec_result)[0] for rec_result in rec_results] - def map_array_call(self, expr, return_tuple): + def map_array_call(self, expr, return_tuple=False): return self.map_call(expr.func_call, return_tuple) # }}} @@ -466,7 +466,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): assert found if result_i is not None: result.append(result_i) - + debug(" result: %s", result) dtype_sets.append(result) -- GitLab From 82e5557b28a0d3bae852e3afb27196542767868e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 13 Feb 2018 12:55:57 -0600 Subject: [PATCH 022/116] Added some exceptions for ArrayCallInstruction --- loopy/check.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index 7e661b566..691783e86 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -131,10 +131,12 @@ def check_for_double_use_of_hw_axes(kernel): def check_for_inactive_iname_access(kernel): + from loopy.kernel.instruction import ArrayCallInstruction for insn in kernel.instructions: expression_inames = insn.read_dependency_names() & kernel.all_inames() - if not expression_inames <= kernel.insn_inames(insn): + if not expression_inames <= kernel.insn_inames(insn) and ( + not isinstance(insn, ArrayCallInstruction)): raise LoopyError( "instruction '%s' references " "inames '%s' that the instruction does not depend on" @@ -170,9 +172,12 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): from loopy.kernel.data import ConcurrentTag + from loopy.kernel.instruction import ArrayCallInstruction iname_to_tag = kernel.iname_to_tag.get for insn in kernel.instructions: + if isinstance(insn, ArrayCallInstruction): + continue for assignee_name, assignee_indices in zip( insn.assignee_var_names(), insn.assignee_subscript_deps()): -- GitLab From d820a75c87269c6f639e990c8e50a41abf09c340 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 13 Feb 2018 13:56:00 -0600 Subject: [PATCH 023/116] Now need to add the size parameters for array_sin --- loopy/codegen/control.py | 2 ++ loopy/kernel/instruction.py | 4 +++- loopy/target/c/__init__.py | 16 ++++++++-------- loopy/type_inference.py | 3 +++ 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index e3e209726..68d374ece 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -248,6 +248,8 @@ def build_loop_nest(codegen_state, schedule_index): if not codegen_state.ast_builder.can_implement_conditionals: result = [] inner = generate_code_for_sched_index(codegen_state, schedule_index) + print(inner) + 1/0 if inner is not None: result.append(inner) return merge_codegen_results(codegen_state, result) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index f6c5c2b13..b36bc39db 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1076,6 +1076,8 @@ class ArrayCallInstruction(MultiAssignmentBase): forced_iname_deps=None, forced_iname_deps_is_final=None): + within_inames = frozenset() + super(ArrayCallInstruction, self).__init__( id=id, depends_on=depends_on, @@ -1083,7 +1085,7 @@ class ArrayCallInstruction(MultiAssignmentBase): groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, - within_inames_is_final=within_inames_is_final, + within_inames_is_final=True, within_inames=within_inames, boostable=boostable, boostable_into=boostable_into, diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index dea103b9e..9ea829568 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -970,11 +970,13 @@ class CASTBuilder(ASTBuilderBase): func_id = "array_"+insn.expression.func_call.function.name parameters = insn.expression.func_call.parameters + assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) for a in insn.assignee_var_names()] - par_dtypes = tuple(ecm.infer_type(par) for par in parameters) + par_dtypes = tuple([ecm.infer_type(ass) for ass in insn.assignees] + + [ecm.infer_type(par) for par in parameters]) mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) if mangle_result is None: @@ -993,12 +995,13 @@ class CASTBuilder(ASTBuilderBase): ''' from loopy.expression import dtype_to_type_context + from pymbolic import var c_parameters = [ - ecm(par, PREC_NONE, + ecm(var("*"+par.aggregate.name), PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( - parameters, par_dtypes, mangle_result.arg_dtypes)] + insn.assignees+parameters, par_dtypes, mangle_result.arg_dtypes)] from loopy.codegen import SeenFunction codegen_state.seen_functions.add( @@ -1033,11 +1036,8 @@ class CASTBuilder(ASTBuilderBase): assignee_var_descriptors[0].dtype, result) - lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) - - from cgen import Assign - return Assign( - lhs_code, + from cgen import ExpressionStatement + return ExpressionStatement( CExpression(self.get_c_expression_to_code_mapper(), result)) # }}} diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 7a7c2b901..67e5dfe90 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -287,6 +287,9 @@ class TypeInferenceMapper(CombineMapper): % (identifier, len(arg_dtypes))) def map_variable(self, expr): + from pymbolic import var + if expr.name[0] == "*": + expr = var(expr.name[1:]) if expr.name in self.kernel.all_inames(): return [self.kernel.index_dtype] -- GitLab From 0451da0aee47c2ad4f83c2adb16c72482082af38 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 13 Feb 2018 15:26:07 -0600 Subject: [PATCH 024/116] Made the last part a joke! --- loopy/target/c/codegen/expression.py | 2 ++ loopy/target/opencl.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 2be250121..e80c5bb1c 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -745,6 +745,8 @@ class CExpressionToCodeMapper(RecursiveMapper): "entry to loopy") def map_variable(self, expr, enclosing_prec): + if expr.name[0] == "*": + return expr.name[1:] return expr.name map_tagged_variable = map_variable diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 31e0569b9..0f4dd4ddf 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -281,6 +281,8 @@ def opencl_preamble_generator(preamble_info): kernel = preamble_info.kernel yield ("00_declare_gid_lid", remove_common_indentation(""" + #include "/home/kgk2/array_funcs.h" + #define lid(N) ((%(idx_ctype)s) get_local_id(N)) #define gid(N) ((%(idx_ctype)s) get_group_id(N)) """ % dict(idx_ctype=kernel.target.dtype_to_typename( -- GitLab From 7e5c0143e77c1d153b384cb490f9f016a69cfa8c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 13 Feb 2018 17:29:25 -0600 Subject: [PATCH 025/116] Changed the way inames have been initiated --- loopy/kernel/instruction.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index b36bc39db..077f146a8 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1076,8 +1076,6 @@ class ArrayCallInstruction(MultiAssignmentBase): forced_iname_deps=None, forced_iname_deps_is_final=None): - within_inames = frozenset() - super(ArrayCallInstruction, self).__init__( id=id, depends_on=depends_on, @@ -1085,7 +1083,7 @@ class ArrayCallInstruction(MultiAssignmentBase): groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, - within_inames_is_final=True, + within_inames_is_final=within_inames_is_final, within_inames=within_inames, boostable=boostable, boostable_into=boostable_into, @@ -1119,6 +1117,10 @@ class ArrayCallInstruction(MultiAssignmentBase): self.assignees = assignees self.expression = expression + # FIXME: Currently assumes that all the assignees share the same inames + self.within_inames = frozenset(set(self.assignee_subscript_deps()[0]) - + set(iname.name for iname in expression.inames_set)) + if temp_var_types is None: self.temp_var_types = (None,) * len(self.assignees) else: -- GitLab From a94c5c366ec6022a28379c8c109f065023ad5113 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 13 Feb 2018 17:32:57 -0600 Subject: [PATCH 026/116] Stricter rules on array calls. --- loopy/symbolic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 6350d47c1..0b64b67cd 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -652,8 +652,9 @@ class ArrayCall(p.Expression): assert isinstance(_inames, tuple) for iname in _inames: assert isinstance(iname, p.Variable) + for par in _func_call.parameters: + assert isinstance(par, p.Subscript) - # FIXME: Need to assert that the # }}} self.func_call = _func_call -- GitLab From 90750ed9aa9859aaace47a98baff39a1199c55b9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 13 Feb 2018 17:51:35 -0600 Subject: [PATCH 027/116] Added functionality for making it into loops --- loopy/kernel/creation.py | 10 ++++++++++ loopy/kernel/instruction.py | 12 +++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index d0ee20e56..e419e52d0 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -365,6 +365,16 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): % opt_value) result["mem_kind"] = opt_value + elif opt_key == "use_dedicated_func": + opt_value = opt_value.lower().strip() + if opt_value is None: + opt_value = 'true' + if opt_value not in ['true', 'false']: + raise LoopyError("Unknown Array Call specified type %s specified" + " expected, 'true' or 'false'." + % opt_value) + result["use_dedicated_func"] = (opt_value=='true') + else: raise ValueError( "unrecognized instruction option '%s' " diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 077f146a8..653c76eb7 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1192,14 +1192,16 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): from loopy.symbolic import ArrayCall if isinstance(expression, ArrayCall): atomicity = kwargs.pop("atomicity", ()) + use_dedicated_func = kwargs.pop("use_dedicated_func", ()) if atomicity: raise LoopyError("atomic operations with more than one " "left-hand side not supported") - return ArrayCallInstruction( - assignees=assignees, - expression=expression, - temp_var_types=temp_var_types, - **kwargs) + if use_dedicated_func: + return ArrayCallInstruction( + assignees=assignees, + expression=expression, + temp_var_types=temp_var_types, + **kwargs) return Assignment( assignee=assignees[0], -- GitLab From 727066127baf92459dd4b584c8231fbd42ffeec4 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 17 Feb 2018 14:41:15 -0600 Subject: [PATCH 028/116] [ci skip] Removed the functions file, due to wrong approach. --- loopy/target/opencl.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 0f4dd4ddf..31e0569b9 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -281,8 +281,6 @@ def opencl_preamble_generator(preamble_info): kernel = preamble_info.kernel yield ("00_declare_gid_lid", remove_common_indentation(""" - #include "/home/kgk2/array_funcs.h" - #define lid(N) ((%(idx_ctype)s) get_local_id(N)) #define gid(N) ((%(idx_ctype)s) get_group_id(N)) """ % dict(idx_ctype=kernel.target.dtype_to_typename( -- GitLab From 6e4b9d661a5970316da63b97a20237e5cf62fe42 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 19 Feb 2018 00:43:42 -0600 Subject: [PATCH 029/116] Added a new kernel parameter external_kernels --- loopy/kernel/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 038ef23ac..043451943 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -183,6 +183,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: target A subclass of :class:`loopy.TargetBase`. + + .. attribue:: external_kernels + + A list of loopy kernels, which is invoked as a kernel within the + current kernel. This is a way to facilitate in-kernel calls as function + calls in loopy. """ # {{{ constructor @@ -214,7 +220,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): state=kernel_state.INITIAL, target=None, - overridden_get_grid_sizes_for_insn_ids=None): + overridden_get_grid_sizes_for_insn_ids=None, + + external_kernels=[]): """ :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get intersected in slab decomposition, their grid sizes shouldn't @@ -289,6 +297,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): options=options, state=state, target=target, + external_kernels=external_kernels, overridden_get_grid_sizes_for_insn_ids=( overridden_get_grid_sizes_for_insn_ids)) -- GitLab From 041569de6de91d1633f0c7fa31aadaeb8e050772 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 19 Feb 2018 04:28:45 -0600 Subject: [PATCH 030/116] basic working of the kernel invocation is done. --- loopy/__init__.py | 3 + loopy/library/function.py | 20 ++++- loopy/target/c/__init__.py | 7 +- loopy/transform/register_knl.py | 125 ++++++++++++++++++++++++++++++++ 4 files changed, 148 insertions(+), 7 deletions(-) create mode 100644 loopy/transform/register_knl.py diff --git a/loopy/__init__.py b/loopy/__init__.py index 92f5056fb..d6ab224bb 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -115,6 +115,7 @@ from loopy.transform.batch import to_batched from loopy.transform.parameter import assume, fix_parameters from loopy.transform.save import save_and_reload_temporaries from loopy.transform.add_barrier import add_barrier +from loopy.transform.register_knl import register_knl # }}} from loopy.type_inference import infer_unknown_types @@ -219,6 +220,8 @@ __all__ = [ "add_barrier", + "register_knl", + # }}} "get_dot_dependency_graph", diff --git a/loopy/library/function.py b/loopy/library/function.py index 634b10418..db0d483a8 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -26,8 +26,9 @@ THE SOFTWARE. def default_function_mangler(kernel, name, arg_dtypes): from loopy.library.reduction import reduction_function_mangler - manglers = [reduction_function_mangler, tuple_function_mangler, - array_call_function_mangler] + # danda + # removing array_call_function_mangler from the list + manglers = [reduction_function_mangler, tuple_function_mangler] for mangler in manglers: result = mangler(kernel, name, arg_dtypes) if result is not None: @@ -57,6 +58,18 @@ def tuple_function_mangler(kernel, name, arg_dtypes): return None +def kernel_call_function_mangler(kernel, name, arg_dtypes): + external_kernel_names = [ext_knl.name for ext_knl in + kernel.external_kernels] + if name in external_kernel_names: + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name=name, + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + +""" def array_call_function_mangler(kernel, name, arg_dtypes): if name[:5] == "array": from loopy.kernel.data import CallMangleInfo @@ -66,6 +79,5 @@ def array_call_function_mangler(kernel, name, arg_dtypes): arg_dtypes=arg_dtypes) return None - - +""" # vim: foldmethod=marker diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 9ea829568..caf88f27e 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -964,13 +964,14 @@ class CASTBuilder(ASTBuilderBase): def emit_array_call(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper - from pymbolic.primitives import Variable from pymbolic.mapper.stringifier import PREC_NONE - func_id = "array_"+insn.expression.func_call.function.name + # yaha change kiya + # danda + # not adding the "array_" any more + func_id = insn.expression.func_call.function.name parameters = insn.expression.func_call.parameters - assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) for a in insn.assignee_var_names()] diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py new file mode 100644 index 000000000..01aa626ff --- /dev/null +++ b/loopy/transform/register_knl.py @@ -0,0 +1,125 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2018 Andreas Sir" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from loopy.kernel import LoopKernel +from loopy.codegen import generate_code_v2 + +__doc__ = """ +.. currentmodule:: loopy + +.. autofunction:: register_knl +""" + +# {{{ Register Functions + + +def register_preamble_generators(kernel, preamble_generators): + """ + :arg manglers: list of functions of signature ``(preamble_info)`` + generating tuples ``(sortable_str_identifier, code)``, + where *preamble_info* is a :class:`PreambleInfo`. + :returns: *kernel* with *manglers* registered + """ + new_pgens = kernel.preamble_generators[:] + for pgen in preamble_generators: + if pgen not in new_pgens: + new_pgens.insert(0, pgen) + + return kernel.copy(preamble_generators=new_pgens) + + +def register_function_manglers(kernel, manglers): + """ + :arg manglers: list of functions of signature `(target, name, arg_dtypes)` + returning a :class:`loopy.CallMangleInfo`. + :returns: *kernel* with *manglers* registered + """ + new_manglers = kernel.function_manglers[:] + for m in manglers: + if m not in new_manglers: + new_manglers.insert(0, m) + + return kernel.copy(function_manglers=new_manglers) + +# }}} + + +# {{{ main entrypoint + +def register_knl(parent, child): + """ + The purpose of this transformation is so that one can inoke the child + kernel in the parent kernel. + + :arg parent + + This is the "main" kernel which will mostly remain unaltered and one + can interpret it as stitching up the child kernel in the parent kernel. + + :arg child + + This is like a function in every other language and this might be + invoked in one of the instructions of the parent kernel. + + ..note:: + + One should note that the kernels would go under stringent compatibilty + tests so that both of them can be confirmed to be made for each other. + """ + + # {{{ Sanity Checks + + assert isinstance(parent, LoopKernel) + assert isinstance(child, LoopKernel) + + # }}} + + child_func = generate_code_v2(child).device_programs[0] + child_body = str(child_func.ast) + + def child_mangler(kernel, name, arg_dtypes): + if name == child_func.name: + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name=name, + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + return None + + def child_preamble_generator(preamble_info): + for func in preamble_info.seen_functions: + if child_func.name == func.name: + yield("00_enable_child", + child_body) + break + + new_parent = register_preamble_generators(parent, + [child_preamble_generator]) + new_parent = register_function_manglers(new_parent, [child_mangler]) + + return new_parent +# }}} + +# vim: foldmethod=marker -- GitLab From 672f4ac9cac8de356ab05e6b1475e8c8ce5c019c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 20 Feb 2018 02:19:17 -0600 Subject: [PATCH 031/116] Removed the use_dedicated_func. --- loopy/kernel/instruction.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 653c76eb7..077f146a8 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1192,16 +1192,14 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): from loopy.symbolic import ArrayCall if isinstance(expression, ArrayCall): atomicity = kwargs.pop("atomicity", ()) - use_dedicated_func = kwargs.pop("use_dedicated_func", ()) if atomicity: raise LoopyError("atomic operations with more than one " "left-hand side not supported") - if use_dedicated_func: - return ArrayCallInstruction( - assignees=assignees, - expression=expression, - temp_var_types=temp_var_types, - **kwargs) + return ArrayCallInstruction( + assignees=assignees, + expression=expression, + temp_var_types=temp_var_types, + **kwargs) return Assignment( assignee=assignees[0], -- GitLab From 74e7d54ecce9da334a598c0e3c970539447508a4 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 20 Feb 2018 14:07:39 -0600 Subject: [PATCH 032/116] Added a pointer variable --- loopy/symbolic.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 0b64b67cd..c84101039 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -110,6 +110,9 @@ class IdentityMapperMixin(object): return ArrayCall(expr.func_call, expr.inames) + def map_pointer_variable(self, expr, *args): + return PointerVariable(expr.name) + map_type_cast = map_type_annotation @@ -174,6 +177,12 @@ class WalkMapper(WalkMapperBase): return self.map_call(expr.func_call, *args) + def map_pointer_variable(self, expr, *args): + if not self.visit(expr): + return + + return self.post_visit(expr) + class CallbackMapper(CallbackMapperBase, IdentityMapper): map_reduction = CallbackMapperBase.map_constant @@ -253,6 +262,10 @@ class StringifyMapper(StringifyMapperBase): inames=expr.inames, rhs=expr.func_call.parameters) + def map_pointer_variable(self, expr, prec): + return "PointerVariable({name})" % ( + expr.name) + class UnidirectionalUnifier(UnidirectionalUnifierBase): def map_reduction(self, expr, other, unis): @@ -310,6 +323,10 @@ class DependencyMapper(DependencyMapperBase): def map_array_call(self, expr, *args): return self.map_call(expr.func_call) + def map_pointer_variable(self, expr, *args): + raise NotImplementedError("Dependency Mapper needs to be implemented" + "for pointer variable") + map_linear_subscript = DependencyMapperBase.map_subscript def map_type_cast(self, expr): @@ -721,6 +738,24 @@ class RuleArgument(p.Expression): mapper_method = intern("map_rule_argument") +class PointerVariable(p.Expression): + """ This is used to give out the pointer support so that they could be + implemented in function calls for arrays. + """ + + init_arg_name = ("name") + + def __init__(self, name): + self.name = name + + def __getinitargs__(self): + return (self.name, ) + + def stringifier(self): + return StringifyMapper + + mapper_method = intern("map_pointer_variable") + # }}} -- GitLab From c58e40d0cf44b2e86da54e707a84dd73f5761079 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 20 Feb 2018 14:31:41 -0600 Subject: [PATCH 033/116] Removed the starry business(:P) and switched to PointerVariable --- loopy/target/c/__init__.py | 14 +++---- loopy/target/c/codegen/expression.py | 5 ++- loopy/transform/register_knl.py | 55 +++++++++++++++++++++++++++- loopy/type_inference.py | 6 ++- 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index caf88f27e..fc902ed1a 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -738,7 +738,6 @@ class CASTBuilder(ASTBuilderBase): def get_global_arg_decl(self, name, shape, dtype, is_written): from cgen import RestrictPointer, Const - arg_decl = RestrictPointer(POD(self, dtype, name)) if not is_written: @@ -965,10 +964,8 @@ class CASTBuilder(ASTBuilderBase): ecm = codegen_state.expression_to_code_mapper from pymbolic.mapper.stringifier import PREC_NONE + from loopy.symbolic import PointerVariable as pvar - # yaha change kiya - # danda - # not adding the "array_" any more func_id = insn.expression.func_call.function.name parameters = insn.expression.func_call.parameters @@ -976,8 +973,8 @@ class CASTBuilder(ASTBuilderBase): codegen_state.kernel.get_var_descriptor(a) for a in insn.assignee_var_names()] - par_dtypes = tuple([ecm.infer_type(ass) for ass in insn.assignees] - + [ecm.infer_type(par) for par in parameters]) + par_dtypes = tuple([ecm.infer_type(par) for par in parameters] + + [ecm.infer_type(ass) for ass in insn.assignees]) mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) if mangle_result is None: @@ -996,13 +993,12 @@ class CASTBuilder(ASTBuilderBase): ''' from loopy.expression import dtype_to_type_context - from pymbolic import var c_parameters = [ - ecm(var("*"+par.aggregate.name), PREC_NONE, + ecm(pvar(par.aggregate.name), PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( - insn.assignees+parameters, par_dtypes, mangle_result.arg_dtypes)] + parameters+insn.assignees, par_dtypes, mangle_result.arg_dtypes)] from loopy.codegen import SeenFunction codegen_state.seen_functions.add( diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index e80c5bb1c..e700081c2 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -165,6 +165,9 @@ class ExpressionToCExpressionMapper(IdentityMapper): def map_tagged_variable(self, expr, type_context): return var(expr.name) + def map_pointer_variable(self, expr, type_context): + return var(expr.name) + def map_subscript(self, expr, type_context): def base_impl(expr, type_context): return self.rec(expr.aggregate, type_context)[self.rec(expr.index, 'i')] @@ -745,8 +748,6 @@ class CExpressionToCodeMapper(RecursiveMapper): "entry to loopy") def map_variable(self, expr, enclosing_prec): - if expr.name[0] == "*": - return expr.name[1:] return expr.name map_tagged_variable = map_variable diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 01aa626ff..cd3147792 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -24,6 +24,9 @@ THE SOFTWARE. from loopy.kernel import LoopKernel from loopy.codegen import generate_code_v2 +from loopy.kernel.instruction import(ArrayCallInstruction, + Assignment, NoOpInstruction, BarrierInstruction, Assignment, + CallInstruction, _DataObliviousInstruction) __doc__ = """ .. currentmodule:: loopy @@ -98,6 +101,8 @@ def register_knl(parent, child): child_func = generate_code_v2(child).device_programs[0] child_body = str(child_func.ast) + # {{{ Child kernel related function mangler and preamble generator + def child_mangler(kernel, name, arg_dtypes): if name == child_func.name: from loopy.kernel.data import CallMangleInfo @@ -111,9 +116,57 @@ def register_knl(parent, child): def child_preamble_generator(preamble_info): for func in preamble_info.seen_functions: if child_func.name == func.name: - yield("00_enable_child", + yield("01_enable_child", child_body) break + # }}} + + + # {{{ Getting the instruction which have this function. + + # these are the instructions which have the child kernel called thorugh the + # ArrayCallInstruction + insns = [] + + for insn in parent.instructions: + if isinstance(insn, ArrayCallInstruction): + if insn.expression.func_call.function.name == child_func.name: + # only collecting the array instructions that are needed by the + # connection + insns.append(insn) + elif isinstance(insn, (Assignment, NoOpInstruction, + BarrierInstruction, Assignment, CallInstruction, + _DataObliviousInstruction)): + pass + else: + raise NotImplementedError("register_knl not made for %s type of" + "instruciton" % (str(type(insn)))) + + # }}} + + # {{{ Identifying the output and input. + + # }}} + + # {{{ Type Checking + + # no idea of how do I do this. + # This is not our work to do. The manglers should take care of this. + + # }}} + + # {{{ Domain checking + + # need to assert the lengths of the intervals are same for the caller and + # callee + + # I think if we just ensure that the product of the 2 match, then we would + # be good to go.-(nope!) + + # But then what if this is just a kernel for library call, there we won't have any + # iname usage. How do we check? + + # }}} new_parent = register_preamble_generators(parent, [child_preamble_generator]) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 67e5dfe90..a41b08fbe 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -288,8 +288,6 @@ class TypeInferenceMapper(CombineMapper): def map_variable(self, expr): from pymbolic import var - if expr.name[0] == "*": - expr = var(expr.name[1:]) if expr.name in self.kernel.all_inames(): return [self.kernel.index_dtype] @@ -405,6 +403,10 @@ class TypeInferenceMapper(CombineMapper): def map_array_call(self, expr, return_tuple=False): return self.map_call(expr.func_call, return_tuple) + def map_pointer_variable(self, expr): + from pymbolic import var + return self.map_variable(var(expr.name)) + # }}} -- GitLab From 0f012a53582308213e3be052bea6d6c8c2634f58 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 20 Feb 2018 21:29:11 -0600 Subject: [PATCH 034/116] Almost got it working, need. --- loopy/__init__.py | 4 +-- loopy/symbolic.py | 43 ++++++++++++++++------------ loopy/target/c/__init__.py | 7 +++-- loopy/target/c/codegen/expression.py | 4 +-- loopy/transform/register_knl.py | 11 +++++-- loopy/type_inference.py | 5 ++-- 6 files changed, 43 insertions(+), 31 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index d6ab224bb..48f615c1b 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -115,7 +115,7 @@ from loopy.transform.batch import to_batched from loopy.transform.parameter import assume, fix_parameters from loopy.transform.save import save_and_reload_temporaries from loopy.transform.add_barrier import add_barrier -from loopy.transform.register_knl import register_knl +from loopy.transform.register_knl import register_callable_kernel # }}} from loopy.type_inference import infer_unknown_types @@ -220,7 +220,7 @@ __all__ = [ "add_barrier", - "register_knl", + "register_callable_kernel", # }}} diff --git a/loopy/symbolic.py b/loopy/symbolic.py index c84101039..f786695e0 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -110,8 +110,6 @@ class IdentityMapperMixin(object): return ArrayCall(expr.func_call, expr.inames) - def map_pointer_variable(self, expr, *args): - return PointerVariable(expr.name) map_type_cast = map_type_annotation @@ -177,12 +175,6 @@ class WalkMapper(WalkMapperBase): return self.map_call(expr.func_call, *args) - def map_pointer_variable(self, expr, *args): - if not self.visit(expr): - return - - return self.post_visit(expr) - class CallbackMapper(CallbackMapperBase, IdentityMapper): map_reduction = CallbackMapperBase.map_constant @@ -262,10 +254,6 @@ class StringifyMapper(StringifyMapperBase): inames=expr.inames, rhs=expr.func_call.parameters) - def map_pointer_variable(self, expr, prec): - return "PointerVariable({name})" % ( - expr.name) - class UnidirectionalUnifier(UnidirectionalUnifierBase): def map_reduction(self, expr, other, unis): @@ -323,7 +311,7 @@ class DependencyMapper(DependencyMapperBase): def map_array_call(self, expr, *args): return self.map_call(expr.func_call) - def map_pointer_variable(self, expr, *args): + def map_pointered_subscript(self, expr, *args): raise NotImplementedError("Dependency Mapper needs to be implemented" "for pointer variable") @@ -691,6 +679,22 @@ class ArrayCall(p.Expression): def stringifier(self): return StringifyMapper + def get_params_as_starting_subs(self): + result = [] + for par in self.func_call.parameters: + starting_inames = [] + seen_subscripted_iname = False + for iname in par.index_tuple: + if iname in self.inames: + seen_subscripted_iname = True + if seen_subscripted_iname: + starting_inames.append(parse('0')) + else: + starting_inames.append(iname) + result.append( + p.Subscript(par.aggregate, tuple(starting_inames))) + return tuple(result) + @property @memoize_method def inames_set(self): @@ -738,23 +742,24 @@ class RuleArgument(p.Expression): mapper_method = intern("map_rule_argument") -class PointerVariable(p.Expression): +class PointeredSubscript(p.Expression): """ This is used to give out the pointer support so that they could be implemented in function calls for arrays. """ - init_arg_name = ("name") + init_arg_name = ("subscript") - def __init__(self, name): - self.name = name + def __init__(self, subscript): + self.subscript = subscript + assert isinstance(subscript, p.Subscript) def __getinitargs__(self): - return (self.name, ) + return (self.expr) def stringifier(self): return StringifyMapper - mapper_method = intern("map_pointer_variable") + mapper_method = intern("map_pointered_subscript") # }}} diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index fc902ed1a..524f55048 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -964,10 +964,10 @@ class CASTBuilder(ASTBuilderBase): ecm = codegen_state.expression_to_code_mapper from pymbolic.mapper.stringifier import PREC_NONE - from loopy.symbolic import PointerVariable as pvar + from loopy.symbolic import PointeredSubscript as pvar func_id = insn.expression.func_call.function.name - parameters = insn.expression.func_call.parameters + parameters = insn.expression.get_params_as_starting_subs() assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) @@ -993,8 +993,9 @@ class CASTBuilder(ASTBuilderBase): ''' from loopy.expression import dtype_to_type_context + c_parameters = [ - ecm(pvar(par.aggregate.name), PREC_NONE, + ecm(pvar(par), PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index e700081c2..ab0d66783 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -165,8 +165,8 @@ class ExpressionToCExpressionMapper(IdentityMapper): def map_tagged_variable(self, expr, type_context): return var(expr.name) - def map_pointer_variable(self, expr, type_context): - return var(expr.name) + def map_pointered_subscript(self, expr, type_context): + return var("&")(self.map_subscript(expr.subscript, type_context)) def map_subscript(self, expr, type_context): def base_impl(expr, type_context): diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index cd3147792..98298c0de 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -31,7 +31,7 @@ from loopy.kernel.instruction import(ArrayCallInstruction, __doc__ = """ .. currentmodule:: loopy -.. autofunction:: register_knl +.. autofunction:: register_callable_kernel """ # {{{ Register Functions @@ -70,7 +70,7 @@ def register_function_manglers(kernel, manglers): # {{{ main entrypoint -def register_knl(parent, child): +def register_callable_kernel(parent, func_name, child): """ The purpose of this transformation is so that one can inoke the child kernel in the parent kernel. @@ -80,6 +80,11 @@ def register_knl(parent, child): This is the "main" kernel which will mostly remain unaltered and one can interpret it as stitching up the child kernel in the parent kernel. + :arg func_name + + The name of the function call with which the child kernel must be + associated in the parent kernel + :arg child This is like a function in every other language and this might be @@ -98,6 +103,8 @@ def register_knl(parent, child): # }}} + child = child.copy(name=func_name) + child_func = generate_code_v2(child).device_programs[0] child_body = str(child_func.ast) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index a41b08fbe..e55648aaa 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -403,9 +403,8 @@ class TypeInferenceMapper(CombineMapper): def map_array_call(self, expr, return_tuple=False): return self.map_call(expr.func_call, return_tuple) - def map_pointer_variable(self, expr): - from pymbolic import var - return self.map_variable(var(expr.name)) + def map_pointered_subscript(self, expr): + return self.rec(expr.subscript.aggregate) # }}} -- GitLab From 4dcf4b9294453268b732c983e913a347065451d3 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 21 Feb 2018 08:55:54 -0600 Subject: [PATCH 035/116] Works fine for a single parameter. Handles the outer and innner inames appropriately --- loopy/kernel/instruction.py | 21 +++++++++++ loopy/symbolic.py | 3 -- loopy/target/c/__init__.py | 15 +++++++- loopy/transform/register_knl.py | 63 +++++++++++++++++++++------------ 4 files changed, 75 insertions(+), 27 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 077f146a8..e08517065 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1147,6 +1147,27 @@ class ArrayCallInstruction(MultiAssignmentBase): # }}} + def assignee_and_parameters_stride(self): + assignee_stride = [] + param_stride = [] + inner_inames = self.expression.inames + assignee_iname_dict = {} + param_iname_dict = {} + for i, iname in enumerate(self.assignees[0].index_tuple): + assignee_iname_dict[iname] = i + + for i, iname in enumerate(self.expression.func_call.parameters[0].index_tuple): + param_iname_dict[iname] = i + + for iname in inner_inames: + if iname in assignee_iname_dict: + param_stride.append(param_iname_dict[iname]) + if iname in param_iname_dict: + assignee_stride.append(assignee_iname_dict[iname]) + + return assignee_stride, param_stride + + def __str__(self): result = "%s: %s <- %s" % (self.id, ", ".join(str(a) for a in self.assignees), diff --git a/loopy/symbolic.py b/loopy/symbolic.py index f786695e0..a1a5517b7 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -683,11 +683,8 @@ class ArrayCall(p.Expression): result = [] for par in self.func_call.parameters: starting_inames = [] - seen_subscripted_iname = False for iname in par.index_tuple: if iname in self.inames: - seen_subscripted_iname = True - if seen_subscripted_iname: starting_inames.append(parse('0')) else: starting_inames.append(iname) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 524f55048..884d64614 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -964,10 +964,23 @@ class CASTBuilder(ASTBuilderBase): ecm = codegen_state.expression_to_code_mapper from pymbolic.mapper.stringifier import PREC_NONE + from pymbolic.primitives import Subscript from loopy.symbolic import PointeredSubscript as pvar + from loopy.symbolic import parse func_id = insn.expression.func_call.function.name parameters = insn.expression.get_params_as_starting_subs() + assignments = [] + for ass in insn.assignees: + starting_inames = [] + for iname in ass.index_tuple: + if iname in insn.expression.inames: + starting_inames.append(parse('0')) + else: + starting_inames.append(iname) + assignments.append( + Subscript(ass.aggregate, tuple(starting_inames))) + assignments = tuple(assignments) assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) @@ -999,7 +1012,7 @@ class CASTBuilder(ASTBuilderBase): dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( - parameters+insn.assignees, par_dtypes, mangle_result.arg_dtypes)] + parameters+assignments, par_dtypes, mangle_result.arg_dtypes)] from loopy.codegen import SeenFunction codegen_state.seen_functions.add( diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 98298c0de..7bd61de43 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -28,6 +28,9 @@ from loopy.kernel.instruction import(ArrayCallInstruction, Assignment, NoOpInstruction, BarrierInstruction, Assignment, CallInstruction, _DataObliviousInstruction) +from loopy.kernel.array import FixedStrideArrayDimTag as DimTag +from loopy.kernel.data import auto + __doc__ = """ .. currentmodule:: loopy @@ -103,6 +106,43 @@ def register_callable_kernel(parent, func_name, child): # }}} + # {{{ Getting the instruction which have this function. + + # these are the instructions which have the child kernel called thorugh the + # ArrayCallInstruction + insns = [] + + for insn in parent.instructions: + if isinstance(insn, ArrayCallInstruction): + if insn.expression.func_call.function.name == func_name: + # only collecting the array instructions that are needed by the + # connection + insns.append(insn) + parent_par = insn.expression.func_call.parameters[0].aggregate.name + parent_ass = insn.assignees[0].aggregate.name + ass_stride, par_stride = insn.assignee_and_parameters_stride() + elif isinstance(insn, (Assignment, NoOpInstruction, + BarrierInstruction, Assignment, CallInstruction, + _DataObliviousInstruction)): + pass + else: + raise NotImplementedError("register_knl not made for %s type of" + "instruciton" % (str(type(insn)))) + + new_args = child.args.copy() + param_dim_tags = [] + assignee_dim_tags = [] + for i, index in enumerate(par_stride): + param_dim_tags.append(DimTag(parent.arg_dict[parent_par].dim_tags[index].stride)) + assignee_dim_tags.append(DimTag(parent.arg_dict[parent_ass].dim_tags[index].stride)) + + new_args[0] = new_args[0].copy(dim_tags=(param_dim_tags)) + new_args[1] = new_args[1].copy(dim_tags=(assignee_dim_tags)) + + child = child.copy(args=new_args) + + # }}} + child = child.copy(name=func_name) child_func = generate_code_v2(child).device_programs[0] @@ -128,29 +168,6 @@ def register_callable_kernel(parent, func_name, child): break # }}} - - # {{{ Getting the instruction which have this function. - - # these are the instructions which have the child kernel called thorugh the - # ArrayCallInstruction - insns = [] - - for insn in parent.instructions: - if isinstance(insn, ArrayCallInstruction): - if insn.expression.func_call.function.name == child_func.name: - # only collecting the array instructions that are needed by the - # connection - insns.append(insn) - elif isinstance(insn, (Assignment, NoOpInstruction, - BarrierInstruction, Assignment, CallInstruction, - _DataObliviousInstruction)): - pass - else: - raise NotImplementedError("register_knl not made for %s type of" - "instruciton" % (str(type(insn)))) - - # }}} - # {{{ Identifying the output and input. # }}} -- GitLab From bd6741062d2b62f9f162d3bec9a7591ef53a426e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 23 Feb 2018 16:05:57 -0600 Subject: [PATCH 036/116] Reverted the changes for the function manglers --- loopy/library/function.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/loopy/library/function.py b/loopy/library/function.py index db0d483a8..d0706df8d 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -26,8 +26,6 @@ THE SOFTWARE. def default_function_mangler(kernel, name, arg_dtypes): from loopy.library.reduction import reduction_function_mangler - # danda - # removing array_call_function_mangler from the list manglers = [reduction_function_mangler, tuple_function_mangler] for mangler in manglers: result = mangler(kernel, name, arg_dtypes) @@ -57,27 +55,4 @@ def tuple_function_mangler(kernel, name, arg_dtypes): return None - -def kernel_call_function_mangler(kernel, name, arg_dtypes): - external_kernel_names = [ext_knl.name for ext_knl in - kernel.external_kernels] - if name in external_kernel_names: - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name=name, - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - -""" -def array_call_function_mangler(kernel, name, arg_dtypes): - if name[:5] == "array": - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name=name, - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - return None -""" # vim: foldmethod=marker -- GitLab From 33e1c446458a7263213869dbad44ea18f29cf499 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 24 Feb 2018 15:32:24 -0600 Subject: [PATCH 037/116] Support for multiple parameters nfor stride calculation --- loopy/kernel/instruction.py | 38 +++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index e08517065..0e06c5bb1 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1148,24 +1148,34 @@ class ArrayCallInstruction(MultiAssignmentBase): # }}} def assignee_and_parameters_stride(self): - assignee_stride = [] - param_stride = [] + assignees_stride = [] + params_stride = [] inner_inames = self.expression.inames - assignee_iname_dict = {} - param_iname_dict = {} - for i, iname in enumerate(self.assignees[0].index_tuple): - assignee_iname_dict[iname] = i - for i, iname in enumerate(self.expression.func_call.parameters[0].index_tuple): - param_iname_dict[iname] = i + for assignee in self.assignees: + assignee_iname_dict = {} + assignee_stride = [] - for iname in inner_inames: - if iname in assignee_iname_dict: - param_stride.append(param_iname_dict[iname]) - if iname in param_iname_dict: - assignee_stride.append(assignee_iname_dict[iname]) + for i, iname in enumerate(assignee.index_tuple): + assignee_iname_dict[iname] = i + for iname in inner_inames: + if iname in assignee_iname_dict: + assignee_stride.append(assignee_iname_dict[iname]) + assignees_stride.append(assignee_stride) - return assignee_stride, param_stride + for par in self.expression.func_call.parameters: + param_iname_dict = {} + param_stride = [] + + for i, iname in enumerate(par.index_tuple): + param_iname_dict[iname] = i + + for iname in inner_inames: + if iname in param_iname_dict: + param_stride.append(param_iname_dict[iname]) + params_stride.append(param_stride) + + return assignees_stride, params_stride def __str__(self): -- GitLab From fe5c8a6e513e2f287bf21fad930fba7a1688290d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 24 Feb 2018 18:03:15 -0600 Subject: [PATCH 038/116] Support for multiple inputs --- loopy/kernel/instruction.py | 2 +- loopy/transform/register_knl.py | 167 ++++++++++++++++++-------------- 2 files changed, 97 insertions(+), 72 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 0e06c5bb1..a2a601e5a 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1147,7 +1147,7 @@ class ArrayCallInstruction(MultiAssignmentBase): # }}} - def assignee_and_parameters_stride(self): + def assignee_and_parameters_iname_position(self): assignees_stride = [] params_stride = [] inner_inames = self.expression.inames diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 7bd61de43..317049e08 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -24,12 +24,11 @@ THE SOFTWARE. from loopy.kernel import LoopKernel from loopy.codegen import generate_code_v2 -from loopy.kernel.instruction import(ArrayCallInstruction, - Assignment, NoOpInstruction, BarrierInstruction, Assignment, - CallInstruction, _DataObliviousInstruction) +from loopy.kernel.instruction import (ArrayCallInstruction, Assignment, + NoOpInstruction, BarrierInstruction, CallInstruction, + _DataObliviousInstruction) from loopy.kernel.array import FixedStrideArrayDimTag as DimTag -from loopy.kernel.data import auto __doc__ = """ .. currentmodule:: loopy @@ -70,9 +69,91 @@ def register_function_manglers(kernel, manglers): # }}} +# {{{ Generalized function mangler and preamble generator + + +def get_mangler_and_preamble(child_func_name, child_body): + + def child_mangler(kernel, name, arg_dtypes): + if name == child_func_name: + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name=name, + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + return None + + def child_preamble_generator(preamble_info): + for func in preamble_info.seen_functions: + if child_func_name == func.name: + yield("01_enable_child", + child_body) + break + + return child_mangler, child_preamble_generator + +# }}} + +# {{{ Changing the argument strides + + +def get_child_body(insn, child_knl, parent_knl): + child_knl = child_knl.copy(name=insn.expression.func_call.function.name) + parent_params = [] + parent_assignees = [] + + for par in insn.expression.func_call.parameters: + parent_params.append(par.aggregate.name) + for ass in insn.assignees: + parent_assignees.append(ass.aggregate.name) + + # {{{ creating the parent to child parameter association dictionary + + parent_arg_to_child = {} + for child_par, parent_par in zip(child_knl.args, + insn.expression.func_call.parameters + insn.assignees): + parent_arg_to_child[parent_par.aggregate.name] = child_par.name # noqa + + # }}} + + assignees_stride, params_stride = insn.assignee_and_parameters_iname_position() # noqa + + new_strides_dim_tag_dict = {} + + for par, par_stride in zip(parent_params, params_stride): + param_dim_tags = [] + for i, index in enumerate(par_stride): + param_dim_tags.append(DimTag( + parent_knl.arg_dict[par].dim_tags[index].stride)) + new_strides_dim_tag_dict[parent_arg_to_child[par]] = ( + param_dim_tags) + + for assignee, assignee_stride in zip(parent_assignees, assignees_stride): + assignee_dim_tags = [] + for i, index in enumerate(assignee_stride): + assignee_dim_tags.append(DimTag( + parent_knl.arg_dict[assignee].dim_tags[index].stride)) + new_strides_dim_tag_dict[parent_arg_to_child[assignee]] = ( + assignee_dim_tags) + + new_args = [] + for arg in child_knl.args: + new_args.append( + arg.copy(dim_tags=(new_strides_dim_tag_dict[arg.name]))) + + child_knl = child_knl.copy(args=new_args) + child_func = generate_code_v2(child_knl).device_programs[0] + # FIXME: Need to change the function name accorsing to the instruction id + # FIXME: Need to remove the __kernel and reequired work group alignemnt + child_body = str(child_func.ast) + + return child_body +# }}} # {{{ main entrypoint + def register_callable_kernel(parent, func_name, child): """ The purpose of this transformation is so that one can inoke the child @@ -108,8 +189,6 @@ def register_callable_kernel(parent, func_name, child): # {{{ Getting the instruction which have this function. - # these are the instructions which have the child kernel called thorugh the - # ArrayCallInstruction insns = [] for insn in parent.instructions: @@ -118,9 +197,6 @@ def register_callable_kernel(parent, func_name, child): # only collecting the array instructions that are needed by the # connection insns.append(insn) - parent_par = insn.expression.func_call.parameters[0].aggregate.name - parent_ass = insn.assignees[0].aggregate.name - ass_stride, par_stride = insn.assignee_and_parameters_stride() elif isinstance(insn, (Assignment, NoOpInstruction, BarrierInstruction, Assignment, CallInstruction, _DataObliviousInstruction)): @@ -129,74 +205,23 @@ def register_callable_kernel(parent, func_name, child): raise NotImplementedError("register_knl not made for %s type of" "instruciton" % (str(type(insn)))) - new_args = child.args.copy() - param_dim_tags = [] - assignee_dim_tags = [] - for i, index in enumerate(par_stride): - param_dim_tags.append(DimTag(parent.arg_dict[parent_par].dim_tags[index].stride)) - assignee_dim_tags.append(DimTag(parent.arg_dict[parent_ass].dim_tags[index].stride)) - - new_args[0] = new_args[0].copy(dim_tags=(param_dim_tags)) - new_args[1] = new_args[1].copy(dim_tags=(assignee_dim_tags)) - - child = child.copy(args=new_args) - - # }}} - - child = child.copy(name=func_name) - - child_func = generate_code_v2(child).device_programs[0] - child_body = str(child_func.ast) - - # {{{ Child kernel related function mangler and preamble generator - - def child_mangler(kernel, name, arg_dtypes): - if name == child_func.name: - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name=name, - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - return None - - def child_preamble_generator(preamble_info): - for func in preamble_info.seen_functions: - if child_func.name == func.name: - yield("01_enable_child", - child_body) - break # }}} - # {{{ Identifying the output and input. - + # {{{ collecting the manglers and preambles for each instruction + manglers = [] + preambles = [] + for insn in insns: + mangler, preamble = get_mangler_and_preamble(func_name, + get_child_body(insn, child, parent)) + manglers.append(mangler) + preambles.append(preamble) # }}} - # {{{ Type Checking - - # no idea of how do I do this. - # This is not our work to do. The manglers should take care of this. - - # }}} - - # {{{ Domain checking - - # need to assert the lengths of the intervals are same for the caller and - # callee - - # I think if we just ensure that the product of the 2 match, then we would - # be good to go.-(nope!) - - # But then what if this is just a kernel for library call, there we won't have any - # iname usage. How do we check? - - # }}} - - new_parent = register_preamble_generators(parent, - [child_preamble_generator]) - new_parent = register_function_manglers(new_parent, [child_mangler]) + new_parent = register_preamble_generators(parent, preambles) + new_parent = register_function_manglers(new_parent, manglers) return new_parent + # }}} # vim: foldmethod=marker -- GitLab From 2c18c8a76870072b6f4da926ec998f7a8c91f7dc Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 24 Feb 2018 18:18:25 -0600 Subject: [PATCH 039/116] Changed the name of the function --- loopy/target/c/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 884d64614..e2aaaded8 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -965,10 +965,10 @@ class CASTBuilder(ASTBuilderBase): from pymbolic.mapper.stringifier import PREC_NONE from pymbolic.primitives import Subscript - from loopy.symbolic import PointeredSubscript as pvar + from loopy.symbolic import PointeredSubscript as Pvar from loopy.symbolic import parse - func_id = insn.expression.func_call.function.name + func_id = insn.expression.func_call.function.name + "_" + str(insn.id) parameters = insn.expression.get_params_as_starting_subs() assignments = [] for ass in insn.assignees: @@ -1008,7 +1008,7 @@ class CASTBuilder(ASTBuilderBase): from loopy.expression import dtype_to_type_context c_parameters = [ - ecm(pvar(par), PREC_NONE, + ecm(Pvar(par), PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( -- GitLab From 2397b50b3cbe1463ef34ed4826b1a187ca133f10 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 24 Feb 2018 18:19:06 -0600 Subject: [PATCH 040/116] Made changes so that we could register the name of the kernel appended with the instruction id --- loopy/transform/register_knl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 317049e08..1d6b9641f 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -99,7 +99,7 @@ def get_mangler_and_preamble(child_func_name, child_body): def get_child_body(insn, child_knl, parent_knl): - child_knl = child_knl.copy(name=insn.expression.func_call.function.name) + child_knl = child_knl.copy(name=insn.expression.func_call.function.name + "_" + insn.id) parent_params = [] parent_assignees = [] @@ -211,7 +211,7 @@ def register_callable_kernel(parent, func_name, child): manglers = [] preambles = [] for insn in insns: - mangler, preamble = get_mangler_and_preamble(func_name, + mangler, preamble = get_mangler_and_preamble(func_name+"_"+str(insn.id), get_child_body(insn, child, parent)) manglers.append(mangler) preambles.append(preamble) -- GitLab From 7e35af8e83783378a8c36ce50789962e9bf7bd85 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 24 Feb 2018 18:29:06 -0600 Subject: [PATCH 041/116] Got the multi-parameters working --- loopy/library/array_call.py | 110 ------------------------------------ 1 file changed, 110 deletions(-) delete mode 100644 loopy/library/array_call.py diff --git a/loopy/library/array_call.py b/loopy/library/array_call.py deleted file mode 100644 index c7f6b0539..000000000 --- a/loopy/library/array_call.py +++ /dev/null @@ -1,110 +0,0 @@ -from __future__ import division - -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -from pymbolic.primitives import Expression - -from loopy.diagnostic import LoopyError - - -class ArrayCallOperation(object): - """Subclasses of this type have to be hashable, picklable, and - equality-comparable. - """ - - def __init__(self, _func, _inames, _rhs): - - # {{{ Sanity Check - assert isinstance(_inames, tuple) - assert isinstance(_rhs, Expression) - # }}} - - self.func = _func - self.rhs = _rhs - self.inames = _inames - - def result_dtypes(self, target, *arg_dtypes): - """ - :arg arg_dtypes: may be None if not known - :returns: None if not known, otherwise the returned type - """ - - raise NotImplementedError - - @property - def arg_count(self): - raise NotImplementedError - - def __hash__(self): - # Force subclasses to override - raise NotImplementedError - - def __eq__(self, other): - # Force subclasses to override - raise NotImplementedError - - def __call__(self, dtype, operand1, operand2): - raise NotImplementedError - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - result = "{ArrayCall(({inames}), {rhs}}".format( - inames=",".join(self.inames), - rhs=str(self.rhs)) - - return result - -# {{{ Array Call Mangler - - -def array_call_mangler(kernel, func_name, arg_dtypes): - from loopy.target.opencl import CTarget - if not isinstance(kernel.target, CTarget): - raise LoopyError("%s: only C-like targets supported for now" % ( - func_name)) - - op = func_name - scalar_dtype = arg_dtypes[0] - index_dtype = arg_dtypes[1] - - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="%s_op" % op.prefix( - scalar_dtype, index_dtype), - result_dtypes=op.result_dtypes( - kernel, scalar_dtype, index_dtype), - arg_dtypes=( - scalar_dtype, - index_dtype, - scalar_dtype, - index_dtype), - ) - - return None - -# }}} - -# vim: fdm=marker -- GitLab From 223acfdcb9086f63f8794119d45e5067a617cc5e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 24 Feb 2018 21:36:13 -0600 Subject: [PATCH 042/116] Added Test --- .pytest_cache/v/cache/lastfailed | 1 + loopy/transform/register_knl.py | 1 - test/test_transform.py | 61 ++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 .pytest_cache/v/cache/lastfailed diff --git a/.pytest_cache/v/cache/lastfailed b/.pytest_cache/v/cache/lastfailed new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/.pytest_cache/v/cache/lastfailed @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 1d6b9641f..36447d1e1 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -144,7 +144,6 @@ def get_child_body(insn, child_knl, parent_knl): child_knl = child_knl.copy(args=new_args) child_func = generate_code_v2(child_knl).device_programs[0] - # FIXME: Need to change the function name accorsing to the instruction id # FIXME: Need to remove the __kernel and reequired work group alignemnt child_body = str(child_func.ast) diff --git a/test/test_transform.py b/test/test_transform.py index 0e10db362..c13b5f53a 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -178,6 +178,67 @@ def test_add_barrier(ctx_factory): assert (np.linalg.norm(out-2*a.T) < 1e-16) +def test_register_knl(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + n = 2 ** 4 + x_dev = cl.clrandom.rand(queue, (n, n, n, n, n), np.float64) + y_dev = cl.clrandom.rand(queue, (n, n, n, n, n), np.float64) + z_dev = cl.clrandom.rand(queue, (n, n, n, n, n), np.float64) + + child_knl = lp.make_kernel( + "{[i, j]:0<=i, j < 16}", + """ + c[i, j] = 2*a[i, j] + 3*b[i, j] + """, + kernel_data=[ + lp.GlobalArg( + name='a', + dtype=np.float64, + shape=lp.auto), + lp.GlobalArg( + name='b', + dtype=np.float64, + shape=lp.auto), + lp.GlobalArg( + name='c', + dtype=np.float64, + shape=lp.auto), '...'] + ) + + parent_knl = lp.make_kernel( + "{[i, j, k, l, m]: 0<=i, j, k, l, m<16}", + """ + z[i, j, k, l, m] = linear_combo[j, l](x[i, j, k, l, m], + y[i, j, k, l, m]) + """, + kernel_data=[ + lp.GlobalArg( + name='x', + dtype=np.float64, + shape=(16, 16, 16, 16, 16)), + lp.GlobalArg( + name='y', + dtype=np.float64, + shape=(16, 16, 16, 16, 16)), + lp.GlobalArg( + name='z', + dtype=np.float64, + shape=(16, 16, 16, 16, 16)), '...'], + ) + + knl = lp.register_callable_kernel(parent_knl, 'linear_combo', child_knl) + + evt, (out, ) = knl(queue, x=x_dev, y=y_dev, z=z_dev) + + x_host = x_dev.get() + y_host = y_dev.get() + + assert (np.linalg.norm(2*x_host+3*y_host-out.get())/( + np.linalg.norm(2*x_host+3*y_host))) < 1e-15 + + def test_rename_argument(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) -- GitLab From 7b25d2b6ccc6dd76989c2c1b96a3f11d44b96f5e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 24 Feb 2018 21:48:17 -0600 Subject: [PATCH 043/116] Minor fix --- loopy/transform/register_knl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 36447d1e1..0add40948 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -1,6 +1,6 @@ from __future__ import division, absolute_import -__copyright__ = "Copyright (C) 2018 Andreas Sir" +__copyright__ = "Copyright (C) 2018 Kaushik Kulkarni" __license__ = """ Permission is hereby granted, free of charge, to any person obtaining a copy -- GitLab From dfe68baa6a03412a4c0fe041de7db22802a5c5db Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 00:30:20 -0600 Subject: [PATCH 044/116] Removing debugging statements/digressions --- loopy/codegen/control.py | 2 -- loopy/kernel/__init__.py | 10 +--------- loopy/kernel/creation.py | 13 +------------ 3 files changed, 2 insertions(+), 23 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 68d374ece..e3e209726 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -248,8 +248,6 @@ def build_loop_nest(codegen_state, schedule_index): if not codegen_state.ast_builder.can_implement_conditionals: result = [] inner = generate_code_for_sched_index(codegen_state, schedule_index) - print(inner) - 1/0 if inner is not None: result.append(inner) return merge_codegen_results(codegen_state, result) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 043451943..e325d7a3f 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -183,12 +183,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: target A subclass of :class:`loopy.TargetBase`. - - .. attribue:: external_kernels - - A list of loopy kernels, which is invoked as a kernel within the - current kernel. This is a way to facilitate in-kernel calls as function - calls in loopy. """ # {{{ constructor @@ -220,9 +214,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): state=kernel_state.INITIAL, target=None, - overridden_get_grid_sizes_for_insn_ids=None, - - external_kernels=[]): + overridden_get_grid_sizes_for_insn_ids=None): """ :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get intersected in slab decomposition, their grid sizes shouldn't diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index e419e52d0..4a08c28bd 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -365,16 +365,6 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): % opt_value) result["mem_kind"] = opt_value - elif opt_key == "use_dedicated_func": - opt_value = opt_value.lower().strip() - if opt_value is None: - opt_value = 'true' - if opt_value not in ['true', 'false']: - raise LoopyError("Unknown Array Call specified type %s specified" - " expected, 'true' or 'false'." - % opt_value) - result["use_dedicated_func"] = (opt_value=='true') - else: raise ValueError( "unrecognized instruction option '%s' " @@ -443,7 +433,6 @@ SUBST_RE = re.compile( r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$") - def check_illegal_options(insn_options, insn_type): illegal_options = [] if insn_type not in ['gbarrier', 'lbarrier']: @@ -964,7 +953,7 @@ def parse_instructions(instructions, defines): new_instructions.append(insn) inames_to_dup.append(insn_inames_to_dup) continue - + insn_match = EMPTY_LHS_INSN_RE.match(insn) if insn_match is not None: insn, insn_inames_to_dup = parse_insn( -- GitLab From 0875d640c7de0f54b6ce354c9376d7b70cde4b50 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 00:36:51 -0600 Subject: [PATCH 045/116] Removed debug statements --- loopy/kernel/instruction.py | 11 +++++------ loopy/library/function.py | 1 + loopy/symbolic.py | 10 ---------- loopy/target/c/__init__.py | 1 + 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index a2a601e5a..c39470fe4 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1213,12 +1213,11 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): if not isinstance(expression, (Call, Reduction)): raise LoopyError("right-hand side in multiple assignment must be " "function call or reduction, got: '%s'" % expression) - if isinstance(expression, (Call, Reduction)): - return CallInstruction( - assignees=assignees, - expression=expression, - temp_var_types=temp_var_types, - **kwargs) + return CallInstruction( + assignees=assignees, + expression=expression, + temp_var_types=temp_var_types, + **kwargs) from loopy.symbolic import ArrayCall if isinstance(expression, ArrayCall): diff --git a/loopy/library/function.py b/loopy/library/function.py index d0706df8d..9d557ac9f 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -55,4 +55,5 @@ def tuple_function_mangler(kernel, name, arg_dtypes): return None + # vim: foldmethod=marker diff --git a/loopy/symbolic.py b/loopy/symbolic.py index a1a5517b7..e54558d31 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -110,8 +110,6 @@ class IdentityMapperMixin(object): return ArrayCall(expr.func_call, expr.inames) - - map_type_cast = map_type_annotation map_linear_subscript = IdentityMapperBase.map_subscript @@ -179,8 +177,6 @@ class WalkMapper(WalkMapperBase): class CallbackMapper(CallbackMapperBase, IdentityMapper): map_reduction = CallbackMapperBase.map_constant - # FXIME: Need to undertand what would this be - class CombineMapper(CombineMapperBase): def map_reduction(self, expr): @@ -188,10 +184,6 @@ class CombineMapper(CombineMapperBase): map_linear_subscript = CombineMapperBase.map_subscript - def map_reduction(self, expr): - # FIXME: Come up with a final definition - return self.rec(expr.rhs) - class SubstitutionMapper( CSECachingMapperMixin, SubstitutionMapperBase, IdentityMapperMixin): @@ -282,8 +274,6 @@ class UnidirectionalUnifier(UnidirectionalUnifierBase): from pymbolic.mapper.unifier import unify_many return unify_many(urecs, new_uni_record) - # FIXME: Need to add mapper method for array_call - class DependencyMapper(DependencyMapperBase): def map_group_hw_index(self, expr): diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e2aaaded8..d05d8bbfb 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -738,6 +738,7 @@ class CASTBuilder(ASTBuilderBase): def get_global_arg_decl(self, name, shape, dtype, is_written): from cgen import RestrictPointer, Const + arg_decl = RestrictPointer(POD(self, dtype, name)) if not is_written: -- GitLab From c8b15c23fce36a4ef15211c95db6c58ebdfb7796 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 01:28:41 -0600 Subject: [PATCH 046/116] Minor fixes --- loopy/codegen/instruction.py | 2 +- loopy/kernel/__init__.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index aad755ee5..c027d9579 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -83,7 +83,7 @@ def generate_instruction_code(codegen_state, insn): elif isinstance(insn, CallInstruction): ast = generate_call_code(codegen_state, insn) elif isinstance(insn, ArrayCallInstruction): - ast = generate_array_call_code(codegen_state,insn) + ast = generate_array_call_code(codegen_state, insn) elif isinstance(insn, CInstruction): ast = generate_c_instruction_code(codegen_state, insn) else: diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index e325d7a3f..038ef23ac 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -289,7 +289,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): options=options, state=state, target=target, - external_kernels=external_kernels, overridden_get_grid_sizes_for_insn_ids=( overridden_get_grid_sizes_for_insn_ids)) -- GitLab From 5df73702f7dcea3dbd20fb6b5c7e16a4d90bbf9c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 01:55:39 -0600 Subject: [PATCH 047/116] Placate Flake8 --- loopy/__init__.py | 3 +-- loopy/kernel/instruction.py | 1 - loopy/symbolic.py | 1 + loopy/transform/register_knl.py | 7 ++++--- loopy/type_inference.py | 3 +-- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 48f615c1b..9c39a38be 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -40,8 +40,7 @@ from loopy.kernel.instruction import ( memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate, InstructionBase, MultiAssignmentBase, Assignment, ExpressionInstruction, - CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction, - ArrayCallInstruction) + CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction) from loopy.kernel.data import ( auto, KernelArgument, diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index c39470fe4..335cd979c 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1177,7 +1177,6 @@ class ArrayCallInstruction(MultiAssignmentBase): return assignees_stride, params_stride - def __str__(self): result = "%s: %s <- %s" % (self.id, ", ".join(str(a) for a in self.assignees), diff --git a/loopy/symbolic.py b/loopy/symbolic.py index e54558d31..9dffb4cd1 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -729,6 +729,7 @@ class RuleArgument(p.Expression): mapper_method = intern("map_rule_argument") + class PointeredSubscript(p.Expression): """ This is used to give out the pointer support so that they could be implemented in function calls for arrays. diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 0add40948..8f2426bd2 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -99,7 +99,8 @@ def get_mangler_and_preamble(child_func_name, child_body): def get_child_body(insn, child_knl, parent_knl): - child_knl = child_knl.copy(name=insn.expression.func_call.function.name + "_" + insn.id) + child_knl = child_knl.copy( + name=insn.expression.func_call.function.name + "_" + insn.id) parent_params = [] parent_assignees = [] @@ -197,8 +198,8 @@ def register_callable_kernel(parent, func_name, child): # connection insns.append(insn) elif isinstance(insn, (Assignment, NoOpInstruction, - BarrierInstruction, Assignment, CallInstruction, - _DataObliviousInstruction)): + BarrierInstruction, Assignment, + CallInstruction, _DataObliviousInstruction)): pass else: raise NotImplementedError("register_knl not made for %s type of" diff --git a/loopy/type_inference.py b/loopy/type_inference.py index e55648aaa..c9c1cedfb 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -287,7 +287,6 @@ class TypeInferenceMapper(CombineMapper): % (identifier, len(arg_dtypes))) def map_variable(self, expr): - from pymbolic import var if expr.name in self.kernel.all_inames(): return [self.kernel.index_dtype] @@ -470,7 +469,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): assert found if result_i is not None: result.append(result_i) - + debug(" result: %s", result) dtype_sets.append(result) -- GitLab From c01f09b0ca836ea08ce704c0678b9b2bf501d6cc Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 17:03:41 -0600 Subject: [PATCH 048/116] Now able to handle SubArrayRefs. Need to handle backend. Go. Go Go! --- loopy/symbolic.py | 152 ++++++++++++++++++++++++++++------------------ 1 file changed, 92 insertions(+), 60 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 9dffb4cd1..3f81ae9f7 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -107,8 +107,11 @@ class IdentityMapperMixin(object): return type(expr)(expr.type, self.rec(expr.child)) def map_array_call(self, expr, *args): - return ArrayCall(expr.func_call, - expr.inames) + return ArrayCall(expr.function, + expr.parameters) + + def map_sub_array_ref(self, expr, *args): + return SubArrayRef(expr.inner_inames, expr.subscr) map_type_cast = map_type_annotation @@ -168,11 +171,19 @@ class WalkMapper(WalkMapperBase): map_rule_argument = map_group_hw_index def map_array_call(self, expr, *args): + 1/0 if not self.visit(expr): return return self.map_call(expr.func_call, *args) + def map_sub_array_ref(self, expr, *args): + if not self.visit(expr): + return + + self.rec(expr.inner_inames) + self.rec(expr.subscr) + class CallbackMapper(CallbackMapperBase, IdentityMapper): map_reduction = CallbackMapperBase.map_constant @@ -241,11 +252,17 @@ class StringifyMapper(StringifyMapperBase): return "cast(%s, %s)" % (repr(expr.type), self.rec(expr.child, PREC_NONE)) def map_array_call(self, expr, prec): + 1/0 return "ArrayCall({func_name}[{inames}]({rhs}))".format( func_name=expr.func_call.function.name, inames=expr.inames, rhs=expr.func_call.parameters) + def map_sub_array_ref(self, expr, prec): + return "SubArrayRef({inames}, ({subscr}))".format( + inames=self.rec(expr.inner_inames, prec), + subscr=self.rec(expr.subscr, prec)) + class UnidirectionalUnifier(UnidirectionalUnifierBase): def map_reduction(self, expr, other, unis): @@ -298,8 +315,11 @@ class DependencyMapper(DependencyMapperBase): def map_loopy_function_identifier(self, expr): return set() - def map_array_call(self, expr, *args): - return self.map_call(expr.func_call) + def map_sub_array_ref(self, expr, *args): + deps = self.rec(expr.subscr) + return deps - set(iname for iname in expr.inner_inames) + + map_array_call = map_call def map_pointered_subscript(self, expr, *args): raise NotImplementedError("Dependency Mapper needs to be implemented" @@ -620,73 +640,79 @@ class Reduction(p.Expression): mapper_method = intern("map_reduction") -class ArrayCall(p.Expression): - """Represents an function call over an array across :attr: `inames`. - - .. attribute:: func - - a string indicating the name of the function that is supposed to be - called. For ex. `'sin'`, `'cos'` +class SubArrayRef(p.Expression): + """Represents a generalized sliced notation of an array. - .. attribute:: inames + .. attribute:: inner_inames - a tuple of inames across which the function is to be called. + These are a tuple of sweeping inames over the array. - .. attribute:: rhs + .. attribute:: subscr - an instance of :class: `pymbolic.primtives.Expression` + The subscript whose adress space is to be referenced """ - init_arg_names = ("func_call", "inames") - - def __init__(self, _func_call, _inames): - - # {{{ Input Sanity Checks - assert isinstance(_func_call, p.Call) + init_arg_names = ("inner_inames", "subscr") + def __init__(self, _inames, _subscr): + # {{{ Sanity Checks assert isinstance(_inames, tuple) for iname in _inames: assert isinstance(iname, p.Variable) - for par in _func_call.parameters: - assert isinstance(par, p.Subscript) - + assert isinstance(_subscr, p.Subscript) # }}} - - self.func_call = _func_call - self.inames = _inames + self.inner_inames = _inames + self.subscr = _subscr + + def get_begin_subscript(self): + starting_inames = [] + for iname in self.subscr.index_tuple: + if iname in self.inner_inames: + starting_inames.append(parse('0')) + else: + starting_inames.append(iname) + return p.Subscript(self.subscr.aggregate, tuple(starting_inames)) def __getinitargs__(self): - return (self.func_call, self.inames) + return (self.inner_inames, self.subscr) def get_hash(self): - return hash((self.__class__, self.func_call, self.inames)) + return hash((self.__class__, self.inner_inames, self.subscr)) def is_equal(self, other): return (other.__class__ == self.__class__ - and other.func_call == self.func_call - and other.inames == self.inames) + and other.subscr == self.subscr + and other.inner_inames == self.inner_inames) def stringifier(self): return StringifyMapper + mapper_method = intern("map_sub_array_ref") + + +class ArrayCall(p.Call): + """Represents an function call over an array across :attr: `inames`. + """ + + def __init__(self, function, parameters): + + # {{{ Input Sanity Checks + assert isinstance(function, p.Variable) + assert isinstance(parameters, tuple) + + for par in parameters: + assert isinstance(par, SubArrayRef) + # }}} + + self.function = function + self.parameters = parameters + def get_params_as_starting_subs(self): result = [] for par in self.func_call.parameters: - starting_inames = [] - for iname in par.index_tuple: - if iname in self.inames: - starting_inames.append(parse('0')) - else: - starting_inames.append(iname) - result.append( - p.Subscript(par.aggregate, tuple(starting_inames))) + result.append(par.get_begin_address()) return tuple(result) - @property - @memoize_method - def inames_set(self): - return set(self.inames) - mapper_method = intern("map_array_call") @@ -1150,24 +1176,18 @@ class FunctionToPrimitiveMapper(IdentityMapper): return Reduction(operation, tuple(processed_inames), red_exprs, allow_simultaneous=allow_simultaneous) - def _parse_array_call(self, func_call, inames): - # {{{ Input sanity check - if isinstance(inames, p.Variable): - inames = (inames, ) - if not isinstance(inames, (tuple)): - raise TypeError("iname argument to array_call() must be a symbol " - "or a tuple of symbols") - # }}} - - return ArrayCall(func_call, inames) - def map_call(self, expr): from loopy.library.reduction import parse_reduction_op - if isinstance(expr.function, p.Subscript): - func = expr.function - return self._parse_array_call(p.Call(func.aggregate, - expr.parameters), func.index) + # {{{ Handling ArrayCalls + encountered_sub_array_ref = False + for par in expr.parameters: + if isinstance(par, SubArrayRef): + encountered_sub_array_ref = True + break + if encountered_sub_array_ref: + return ArrayCall(expr.function, expr.parameters) + # }}} if not isinstance(expr.function, p.Variable): return IdentityMapper.map_call(self, expr) @@ -1257,7 +1277,8 @@ class LoopyParser(ParserBase): return float(val) # generic float def parse_prefix(self, pstate): - from pymbolic.parser import _PREC_UNARY, _less, _greater, _identifier + from pymbolic.parser import (_PREC_UNARY, _less, _greater, _identifier, + _openbracket, _closebracket, _colon) if pstate.is_next(_less): pstate.advance() if pstate.is_next(_greater): @@ -1273,6 +1294,17 @@ class LoopyParser(ParserBase): return TypeAnnotation( typename, self.parse_expression(pstate, _PREC_UNARY)) + elif pstate.is_next(_openbracket): + pstate.advance() + pstate.expect_not_end() + inner_inames = self.parse_expression(pstate) + pstate.expect(_closebracket) + pstate.advance() + pstate.expect(_colon) + pstate.advance() + subscript = self.parse_expression(pstate, _PREC_UNARY) + return SubArrayRef(inner_inames, subscript) + else: return super(LoopyParser, self).parse_prefix(pstate) -- GitLab From 59d6f4de80e8319644f47cc980e6babd98dea183 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 17:11:12 -0600 Subject: [PATCH 049/116] Made changes so that the parse_insn accepts SubArrayRef --- loopy/kernel/creation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 4a08c28bd..6aefb9083 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -497,14 +497,16 @@ def parse_insn(groups, insn_options): if isinstance(inner_lhs_i, Lookup): inner_lhs_i = inner_lhs_i.aggregate - from loopy.symbolic import LinearSubscript + from loopy.symbolic import LinearSubscript, SubArrayRef if isinstance(inner_lhs_i, Variable): assignee_names.append(inner_lhs_i.name) elif isinstance(inner_lhs_i, (Subscript, LinearSubscript)): assignee_names.append(inner_lhs_i.aggregate.name) + elif isinstance(inner_lhs_i, (SubArrayRef)): + assignee_names.append(inner_lhs_i.subscr.aggregate.name) else: raise LoopyError("left hand side of assignment '%s' must " - "be variable or subscript" % (lhs_i,)) + "be variable, subscript or SubArrayRef" % (lhs_i,)) new_lhs.append(lhs_i) -- GitLab From decb974e3695ab676fcee85f47b689b9d359eb30 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 17:16:30 -0600 Subject: [PATCH 050/116] Made changes so as to take in SubArrayRef --- loopy/kernel/instruction.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 335cd979c..b13ad77a6 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -505,7 +505,7 @@ def _get_assignee_var_name(expr): def _get_assignee_subscript_deps(expr): from pymbolic.primitives import Variable, Subscript, Lookup - from loopy.symbolic import LinearSubscript, get_dependencies + from loopy.symbolic import LinearSubscript, get_dependencies, SubArrayRef if isinstance(expr, Lookup): expr = expr.aggregate @@ -516,6 +516,8 @@ def _get_assignee_subscript_deps(expr): return get_dependencies(expr.index) elif isinstance(expr, LinearSubscript): return get_dependencies(expr.index) + elif isinstance(expr, SubArrayRef): + return get_dependencies(expr.index) else: raise RuntimeError("invalid lvalue '%s'" % expr) @@ -1109,9 +1111,10 @@ class ArrayCallInstruction(MultiAssignmentBase): "--got '%s'" % type(assignees).__name__) from pymbolic.primitives import Variable, Subscript - from loopy.symbolic import LinearSubscript + from loopy.symbolic import LinearSubscript, SubArrayRef for assignee in assignees: - if not isinstance(assignee, (Variable, Subscript, LinearSubscript)): + if not isinstance(assignee, (Variable, Subscript, LinearSubscript, + SubArrayRef)): raise LoopyError("invalid lvalue '%s'" % assignee) self.assignees = assignees -- GitLab From bf9eb19300cdbc7df4cb8fdbb0793ceccbccbcab Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 18:44:40 -0600 Subject: [PATCH 051/116] Fixes Dependency mapper for SubArrayRef --- loopy/symbolic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 3f81ae9f7..24ae4ade4 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -316,7 +316,7 @@ class DependencyMapper(DependencyMapperBase): return set() def map_sub_array_ref(self, expr, *args): - deps = self.rec(expr.subscr) + deps = self.rec(expr.subscr, *args) return deps - set(iname for iname in expr.inner_inames) map_array_call = map_call -- GitLab From dd51baf585a3218fe6837a7c540fa455e99e6eab Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 18:50:32 -0600 Subject: [PATCH 052/116] Changes accomodating SubArrayRef --- loopy/kernel/instruction.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index b13ad77a6..38bf63fb0 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -480,7 +480,7 @@ class InstructionBase(ImmutableRecord): def _get_assignee_var_name(expr): from pymbolic.primitives import Variable, Subscript, Lookup - from loopy.symbolic import LinearSubscript + from loopy.symbolic import LinearSubscript, SubArrayRef if isinstance(expr, Lookup): expr = expr.aggregate @@ -499,6 +499,13 @@ def _get_assignee_var_name(expr): assert isinstance(agg, Variable) return agg.name + + elif isinstance(expr, SubArrayRef): + agg = expr.subscr.aggregate + assert isinstance(agg, Variable) + + return agg.name + else: raise RuntimeError("invalid lvalue '%s'" % expr) @@ -517,7 +524,7 @@ def _get_assignee_subscript_deps(expr): elif isinstance(expr, LinearSubscript): return get_dependencies(expr.index) elif isinstance(expr, SubArrayRef): - return get_dependencies(expr.index) + return get_dependencies(expr.get_begin_subscript().index) else: raise RuntimeError("invalid lvalue '%s'" % expr) @@ -1121,8 +1128,7 @@ class ArrayCallInstruction(MultiAssignmentBase): self.expression = expression # FIXME: Currently assumes that all the assignees share the same inames - self.within_inames = frozenset(set(self.assignee_subscript_deps()[0]) - - set(iname.name for iname in expression.inames_set)) + self.within_inames = frozenset(set(self.assignee_subscript_deps()[0])) if temp_var_types is None: self.temp_var_types = (None,) * len(self.assignees) -- GitLab From 9851ea443d0b1bada3037b8a69bb4b3871fb3883 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 19:22:51 -0600 Subject: [PATCH 053/116] Makes changes to map_array_call in WalkMapper --- loopy/symbolic.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 24ae4ade4..5a17b2606 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -170,19 +170,18 @@ class WalkMapper(WalkMapperBase): map_rule_argument = map_group_hw_index - def map_array_call(self, expr, *args): - 1/0 + def map_sub_array_ref(self, expr, *args): if not self.visit(expr): return - return self.map_call(expr.func_call, *args) + self.rec(expr.inner_inames, *args) + self.rec(expr.subscr, *args) - def map_sub_array_ref(self, expr, *args): + def map_array_call(self, expr, *args): if not self.visit(expr): return - self.rec(expr.inner_inames) - self.rec(expr.subscr) + return self.map_call(expr, *args) class CallbackMapper(CallbackMapperBase, IdentityMapper): -- GitLab From 8a3768d1ccca919426ffd3b6cdc57b88b0b4918e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 19:27:36 -0600 Subject: [PATCH 054/116] Made Changes to accomodate SubArrayRef --- loopy/transform/register_knl.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 8f2426bd2..43eb45aad 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -100,21 +100,22 @@ def get_mangler_and_preamble(child_func_name, child_body): def get_child_body(insn, child_knl, parent_knl): child_knl = child_knl.copy( - name=insn.expression.func_call.function.name + "_" + insn.id) + name=insn.expression.function.name + "_" + insn.id) parent_params = [] parent_assignees = [] - for par in insn.expression.func_call.parameters: - parent_params.append(par.aggregate.name) + for par in insn.expression.parameters: + parent_params.append(par.subscr.aggregate.name) for ass in insn.assignees: - parent_assignees.append(ass.aggregate.name) + parent_assignees.append(ass.subscr.aggregate.name) # {{{ creating the parent to child parameter association dictionary parent_arg_to_child = {} for child_par, parent_par in zip(child_knl.args, - insn.expression.func_call.parameters + insn.assignees): - parent_arg_to_child[parent_par.aggregate.name] = child_par.name # noqa + insn.expression.parameters + insn.assignees): + parent_arg_to_child[parent_par.subscr.aggregate.name] = ( + child_par.name) # }}} @@ -193,7 +194,7 @@ def register_callable_kernel(parent, func_name, child): for insn in parent.instructions: if isinstance(insn, ArrayCallInstruction): - if insn.expression.func_call.function.name == func_name: + if insn.expression.function.name == func_name: # only collecting the array instructions that are needed by the # connection insns.append(insn) -- GitLab From 28d3aafddd46d0507bc3bdb713062e4ec91e10fe Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 22:47:18 -0600 Subject: [PATCH 055/116] Shortening work involved in register_knl --- loopy/kernel/instruction.py | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 38bf63fb0..e78e9cc50 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1128,6 +1128,7 @@ class ArrayCallInstruction(MultiAssignmentBase): self.expression = expression # FIXME: Currently assumes that all the assignees share the same inames + # FIXME: Change this change this CHANGE THIS... self.within_inames = frozenset(set(self.assignee_subscript_deps()[0])) if temp_var_types is None: @@ -1156,35 +1157,14 @@ class ArrayCallInstruction(MultiAssignmentBase): # }}} - def assignee_and_parameters_iname_position(self): - assignees_stride = [] - params_stride = [] - inner_inames = self.expression.inames + def get_parameters_dim_tags_dict(self, arg_dict): + dim_tags_dict = {} + for par in (self.assignees + self.expression.parameters): + arg_name = par.subscr.aggregate.name + dim_tags_dict[arg_name] = (par.get_inner_stride( + arg_dict[arg_name].dim_tags.stride)) - for assignee in self.assignees: - assignee_iname_dict = {} - assignee_stride = [] - - for i, iname in enumerate(assignee.index_tuple): - assignee_iname_dict[iname] = i - for iname in inner_inames: - if iname in assignee_iname_dict: - assignee_stride.append(assignee_iname_dict[iname]) - assignees_stride.append(assignee_stride) - - for par in self.expression.func_call.parameters: - param_iname_dict = {} - param_stride = [] - - for i, iname in enumerate(par.index_tuple): - param_iname_dict[iname] = i - - for iname in inner_inames: - if iname in param_iname_dict: - param_stride.append(param_iname_dict[iname]) - params_stride.append(param_stride) - - return assignees_stride, params_stride + return dim_tags_dict def __str__(self): result = "%s: %s <- %s" % (self.id, -- GitLab From 53a4be20790bdf6ab303166acb5c2935de3aa885 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 22:47:46 -0600 Subject: [PATCH 056/116] Added support to get inner dim tags --- loopy/symbolic.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 5a17b2606..019072c25 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -672,6 +672,19 @@ class SubArrayRef(p.Expression): starting_inames.append(iname) return p.Subscript(self.subscr.aggregate, tuple(starting_inames)) + def get_inner_dim_tags(self, arg_dim_tags): + """ Gives the dim tags for the inner + This would be used for stride calculation in the child kernel. + This might need to go, once we start calculating the stride length + using the upper and lower bounds of the involved inames. + """ + from loopy.kernel.array import FixedStrideArrayDimTag as DimTag + inner_dim_tags = [] + for dim_tag, iname in zip(arg_dim_tags, self.inner_inames): + if iname in self.inner_inames: + inner_dim_tags.append(DimTag(dim_tag.stride)) + return inner_dim_tags + def __getinitargs__(self): return (self.inner_inames, self.subscr) -- GitLab From 6fe5bc2312605640e4cd6e27c98c51cba2ccdd2e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 23:14:04 -0600 Subject: [PATCH 057/116] Reduced the size of the stride calculation code --- loopy/transform/register_knl.py | 38 ++++++--------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 43eb45aad..bb8ae0644 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -101,48 +101,24 @@ def get_mangler_and_preamble(child_func_name, child_body): def get_child_body(insn, child_knl, parent_knl): child_knl = child_knl.copy( name=insn.expression.function.name + "_" + insn.id) - parent_params = [] - parent_assignees = [] - - for par in insn.expression.parameters: - parent_params.append(par.subscr.aggregate.name) - for ass in insn.assignees: - parent_assignees.append(ass.subscr.aggregate.name) + dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) # {{{ creating the parent to child parameter association dictionary - parent_arg_to_child = {} + child_arg_to_parent = {} for child_par, parent_par in zip(child_knl.args, insn.expression.parameters + insn.assignees): - parent_arg_to_child[parent_par.subscr.aggregate.name] = ( - child_par.name) + child_arg_to_parent[child_par.subscr.aggregate.name] = ( + parent_par.name) # }}} - assignees_stride, params_stride = insn.assignee_and_parameters_iname_position() # noqa - - new_strides_dim_tag_dict = {} - - for par, par_stride in zip(parent_params, params_stride): - param_dim_tags = [] - for i, index in enumerate(par_stride): - param_dim_tags.append(DimTag( - parent_knl.arg_dict[par].dim_tags[index].stride)) - new_strides_dim_tag_dict[parent_arg_to_child[par]] = ( - param_dim_tags) - - for assignee, assignee_stride in zip(parent_assignees, assignees_stride): - assignee_dim_tags = [] - for i, index in enumerate(assignee_stride): - assignee_dim_tags.append(DimTag( - parent_knl.arg_dict[assignee].dim_tags[index].stride)) - new_strides_dim_tag_dict[parent_arg_to_child[assignee]] = ( - assignee_dim_tags) - new_args = [] for arg in child_knl.args: + child_dim_tag = ( + dim_tags_dict[child_arg_to_parent[arg.name]]) new_args.append( - arg.copy(dim_tags=(new_strides_dim_tag_dict[arg.name]))) + arg.copy(dim_tags=child_dim_tag)) child_knl = child_knl.copy(args=new_args) child_func = generate_code_v2(child_knl).device_programs[0] -- GitLab From ac4d1f2ec70a05f9923281d3f030a9e62f79b68f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 23:16:51 -0600 Subject: [PATCH 058/116] Fixes error in function name --- loopy/kernel/instruction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index e78e9cc50..d56e24c34 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1157,12 +1157,12 @@ class ArrayCallInstruction(MultiAssignmentBase): # }}} - def get_parameters_dim_tags_dict(self, arg_dict): + def get_parameters_dim_tag_dict(self, arg_dict): dim_tags_dict = {} for par in (self.assignees + self.expression.parameters): arg_name = par.subscr.aggregate.name - dim_tags_dict[arg_name] = (par.get_inner_stride( - arg_dict[arg_name].dim_tags.stride)) + dim_tags_dict[arg_name] = par.get_inner_dim_tags( + arg_dict[arg_name].dim_tags) return dim_tags_dict -- GitLab From a6e95b4c0548640df70645c0697a7d379a1a9188 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 23:19:27 -0600 Subject: [PATCH 059/116] Fixes minor bugs --- loopy/transform/register_knl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index bb8ae0644..bd0fcb8be 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -108,8 +108,8 @@ def get_child_body(insn, child_knl, parent_knl): child_arg_to_parent = {} for child_par, parent_par in zip(child_knl.args, insn.expression.parameters + insn.assignees): - child_arg_to_parent[child_par.subscr.aggregate.name] = ( - parent_par.name) + child_arg_to_parent[child_par.name] = ( + parent_par.subscr.aggregate.name) # }}} -- GitLab From 4a879184b6d27a7b5f9949c0cca0f8e8b8409f0c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 23:25:42 -0600 Subject: [PATCH 060/116] Adjusted to the new system of loopy specific primitives --- loopy/symbolic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 019072c25..12cc76190 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -721,8 +721,8 @@ class ArrayCall(p.Call): def get_params_as_starting_subs(self): result = [] - for par in self.func_call.parameters: - result.append(par.get_begin_address()) + for par in self.parameters: + result.append(par.get_begin_subscript()) return tuple(result) mapper_method = intern("map_array_call") -- GitLab From e15f18228d4ca4b0cacffa434c409f33670ce3cf Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 23:44:31 -0600 Subject: [PATCH 061/116] Adde d type inference for SubArrayref --- loopy/type_inference.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index c9c1cedfb..71d8efc9a 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -399,8 +399,10 @@ class TypeInferenceMapper(CombineMapper): return [expr.operation.result_dtypes(self.kernel, rec_result)[0] for rec_result in rec_results] - def map_array_call(self, expr, return_tuple=False): - return self.map_call(expr.func_call, return_tuple) + def map_sub_array_ref(self, expr): + return self.rec(expr.subscr) + + map_array_call = map_call def map_pointered_subscript(self, expr): return self.rec(expr.subscript.aggregate) -- GitLab From 506c9ca2fdbbc5cff796261368925fc488af202a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 23:45:33 -0600 Subject: [PATCH 062/116] Changes to accomodate SubArrayRef --- loopy/target/c/__init__.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index d05d8bbfb..948ae702b 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -965,23 +965,11 @@ class CASTBuilder(ASTBuilderBase): ecm = codegen_state.expression_to_code_mapper from pymbolic.mapper.stringifier import PREC_NONE - from pymbolic.primitives import Subscript from loopy.symbolic import PointeredSubscript as Pvar - from loopy.symbolic import parse - func_id = insn.expression.func_call.function.name + "_" + str(insn.id) + func_id = insn.expression.function.name + "_" + str(insn.id) parameters = insn.expression.get_params_as_starting_subs() - assignments = [] - for ass in insn.assignees: - starting_inames = [] - for iname in ass.index_tuple: - if iname in insn.expression.inames: - starting_inames.append(parse('0')) - else: - starting_inames.append(iname) - assignments.append( - Subscript(ass.aggregate, tuple(starting_inames))) - assignments = tuple(assignments) + assignments = tuple([a.get_begin_subscript() for a in insn.assignees]) assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) -- GitLab From 095badea032d6255b98f6d57b2243f5255b9d41b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 25 Feb 2018 23:56:06 -0600 Subject: [PATCH 063/116] Streamlined the code a bit --- loopy/symbolic.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 12cc76190..5f4df15d5 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -251,11 +251,7 @@ class StringifyMapper(StringifyMapperBase): return "cast(%s, %s)" % (repr(expr.type), self.rec(expr.child, PREC_NONE)) def map_array_call(self, expr, prec): - 1/0 - return "ArrayCall({func_name}[{inames}]({rhs}))".format( - func_name=expr.func_call.function.name, - inames=expr.inames, - rhs=expr.func_call.parameters) + return "Array%s" % self.rec(expr, prec) def map_sub_array_ref(self, expr, prec): return "SubArrayRef({inames}, ({subscr}))".format( @@ -680,9 +676,10 @@ class SubArrayRef(p.Expression): """ from loopy.kernel.array import FixedStrideArrayDimTag as DimTag inner_dim_tags = [] - for dim_tag, iname in zip(arg_dim_tags, self.inner_inames): + for dim_tag, iname in zip(arg_dim_tags, self.subscr.index_tuple): if iname in self.inner_inames: inner_dim_tags.append(DimTag(dim_tag.stride)) + return inner_dim_tags def __getinitargs__(self): -- GitLab From ba5859b4d45c0b7222e07bd28bf6bce44e2204ee Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 26 Feb 2018 00:02:58 -0600 Subject: [PATCH 064/116] Made changes according SubArrayRef --- test/test_transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_transform.py b/test/test_transform.py index c13b5f53a..0f3adc41b 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -210,8 +210,8 @@ def test_register_knl(ctx_factory): parent_knl = lp.make_kernel( "{[i, j, k, l, m]: 0<=i, j, k, l, m<16}", """ - z[i, j, k, l, m] = linear_combo[j, l](x[i, j, k, l, m], - y[i, j, k, l, m]) + [j, l]: z[i, j, k, l, m] = linear_combo([j, l]: x[i, j, k, l, m], + [j, l]:y[i, j, k, l, m]) """, kernel_data=[ lp.GlobalArg( -- GitLab From 6ed960ac40191fd1ff20e2af74254e853c6c88f9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 26 Feb 2018 00:04:55 -0600 Subject: [PATCH 065/116] Placate Flake8 --- loopy/symbolic.py | 2 +- loopy/transform/register_knl.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 5f4df15d5..a60339737 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -669,7 +669,7 @@ class SubArrayRef(p.Expression): return p.Subscript(self.subscr.aggregate, tuple(starting_inames)) def get_inner_dim_tags(self, arg_dim_tags): - """ Gives the dim tags for the inner + """ Gives the dim tags for the inner inames. This would be used for stride calculation in the child kernel. This might need to go, once we start calculating the stride length using the upper and lower bounds of the involved inames. diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index bd0fcb8be..e847acc3d 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -28,8 +28,6 @@ from loopy.kernel.instruction import (ArrayCallInstruction, Assignment, NoOpInstruction, BarrierInstruction, CallInstruction, _DataObliviousInstruction) -from loopy.kernel.array import FixedStrideArrayDimTag as DimTag - __doc__ = """ .. currentmodule:: loopy -- GitLab From de17c78cea18f4783aa6dcf60f80d0bf208d0b5a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 26 Feb 2018 11:19:21 -0600 Subject: [PATCH 066/116] Added pytest_cache to gitingore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5c9e73c7b..a7280e374 100644 --- a/.gitignore +++ b/.gitignore @@ -19,5 +19,6 @@ htmlcov .ipynb_checkpoints lextab.py yacctab.py +.pytest_cache .cache -- GitLab From 00c72fdb4a7b35374f0a70831616b8ab9d528a35 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 26 Feb 2018 11:24:30 -0600 Subject: [PATCH 067/116] changes the name from inner_inames to swept_inames --- loopy/symbolic.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index a60339737..754e88498 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -111,7 +111,7 @@ class IdentityMapperMixin(object): expr.parameters) def map_sub_array_ref(self, expr, *args): - return SubArrayRef(expr.inner_inames, expr.subscr) + return SubArrayRef(expr.swept_inames, expr.subscr) map_type_cast = map_type_annotation @@ -174,7 +174,7 @@ class WalkMapper(WalkMapperBase): if not self.visit(expr): return - self.rec(expr.inner_inames, *args) + self.rec(expr.swept_inames, *args) self.rec(expr.subscr, *args) def map_array_call(self, expr, *args): @@ -255,7 +255,7 @@ class StringifyMapper(StringifyMapperBase): def map_sub_array_ref(self, expr, prec): return "SubArrayRef({inames}, ({subscr}))".format( - inames=self.rec(expr.inner_inames, prec), + inames=self.rec(expr.swept_inames, prec), subscr=self.rec(expr.subscr, prec)) @@ -312,7 +312,7 @@ class DependencyMapper(DependencyMapperBase): def map_sub_array_ref(self, expr, *args): deps = self.rec(expr.subscr, *args) - return deps - set(iname for iname in expr.inner_inames) + return deps - set(iname for iname in expr.swept_inames) map_array_call = map_call @@ -638,7 +638,7 @@ class Reduction(p.Expression): class SubArrayRef(p.Expression): """Represents a generalized sliced notation of an array. - .. attribute:: inner_inames + .. attribute:: swept_inames These are a tuple of sweeping inames over the array. @@ -647,7 +647,7 @@ class SubArrayRef(p.Expression): The subscript whose adress space is to be referenced """ - init_arg_names = ("inner_inames", "subscr") + init_arg_names = ("swept_inames", "subscr") def __init__(self, _inames, _subscr): # {{{ Sanity Checks @@ -656,13 +656,13 @@ class SubArrayRef(p.Expression): assert isinstance(iname, p.Variable) assert isinstance(_subscr, p.Subscript) # }}} - self.inner_inames = _inames + self.swept_inames = _inames self.subscr = _subscr def get_begin_subscript(self): starting_inames = [] for iname in self.subscr.index_tuple: - if iname in self.inner_inames: + if iname in self.swept_inames: starting_inames.append(parse('0')) else: starting_inames.append(iname) @@ -677,21 +677,21 @@ class SubArrayRef(p.Expression): from loopy.kernel.array import FixedStrideArrayDimTag as DimTag inner_dim_tags = [] for dim_tag, iname in zip(arg_dim_tags, self.subscr.index_tuple): - if iname in self.inner_inames: + if iname in self.swept_inames: inner_dim_tags.append(DimTag(dim_tag.stride)) return inner_dim_tags def __getinitargs__(self): - return (self.inner_inames, self.subscr) + return (self.swept_inames, self.subscr) def get_hash(self): - return hash((self.__class__, self.inner_inames, self.subscr)) + return hash((self.__class__, self.swept_inames, self.subscr)) def is_equal(self, other): return (other.__class__ == self.__class__ and other.subscr == self.subscr - and other.inner_inames == self.inner_inames) + and other.swept_inames == self.swept_inames) def stringifier(self): return StringifyMapper @@ -1306,13 +1306,13 @@ class LoopyParser(ParserBase): elif pstate.is_next(_openbracket): pstate.advance() pstate.expect_not_end() - inner_inames = self.parse_expression(pstate) + swept_inames = self.parse_expression(pstate) pstate.expect(_closebracket) pstate.advance() pstate.expect(_colon) pstate.advance() subscript = self.parse_expression(pstate, _PREC_UNARY) - return SubArrayRef(inner_inames, subscript) + return SubArrayRef(swept_inames, subscript) else: return super(LoopyParser, self).parse_prefix(pstate) -- GitLab From fdee62383783917d12be7b57161c699b4dd56cd5 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 26 Feb 2018 15:27:18 -0600 Subject: [PATCH 068/116] Removed the function_manglers and preamble_generators present in the register_knl --- loopy/transform/register_knl.py | 84 ++++----------------------------- 1 file changed, 10 insertions(+), 74 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index e847acc3d..f88155ddb 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -34,69 +34,10 @@ __doc__ = """ .. autofunction:: register_callable_kernel """ -# {{{ Register Functions +# {{{ changing the argument strides -def register_preamble_generators(kernel, preamble_generators): - """ - :arg manglers: list of functions of signature ``(preamble_info)`` - generating tuples ``(sortable_str_identifier, code)``, - where *preamble_info* is a :class:`PreambleInfo`. - :returns: *kernel* with *manglers* registered - """ - new_pgens = kernel.preamble_generators[:] - for pgen in preamble_generators: - if pgen not in new_pgens: - new_pgens.insert(0, pgen) - - return kernel.copy(preamble_generators=new_pgens) - - -def register_function_manglers(kernel, manglers): - """ - :arg manglers: list of functions of signature `(target, name, arg_dtypes)` - returning a :class:`loopy.CallMangleInfo`. - :returns: *kernel* with *manglers* registered - """ - new_manglers = kernel.function_manglers[:] - for m in manglers: - if m not in new_manglers: - new_manglers.insert(0, m) - - return kernel.copy(function_manglers=new_manglers) - -# }}} - -# {{{ Generalized function mangler and preamble generator - - -def get_mangler_and_preamble(child_func_name, child_body): - - def child_mangler(kernel, name, arg_dtypes): - if name == child_func_name: - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name=name, - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - return None - - def child_preamble_generator(preamble_info): - for func in preamble_info.seen_functions: - if child_func_name == func.name: - yield("01_enable_child", - child_body) - break - - return child_mangler, child_preamble_generator - -# }}} - -# {{{ Changing the argument strides - - -def get_child_body(insn, child_knl, parent_knl): +def get_strided_child_knl(insn, child_knl, parent_knl): child_knl = child_knl.copy( name=insn.expression.function.name + "_" + insn.id) dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) @@ -119,11 +60,8 @@ def get_child_body(insn, child_knl, parent_knl): arg.copy(dim_tags=child_dim_tag)) child_knl = child_knl.copy(args=new_args) - child_func = generate_code_v2(child_knl).device_programs[0] - # FIXME: Need to remove the __kernel and reequired work group alignemnt - child_body = str(child_func.ast) - return child_body + return child_knl # }}} # {{{ main entrypoint @@ -178,22 +116,20 @@ def register_callable_kernel(parent, func_name, child): pass else: raise NotImplementedError("register_knl not made for %s type of" - "instruciton" % (str(type(insn)))) + "instruction" % (str(type(insn)))) # }}} # {{{ collecting the manglers and preambles for each instruction - manglers = [] - preambles = [] for insn in insns: - mangler, preamble = get_mangler_and_preamble(func_name+"_"+str(insn.id), - get_child_body(insn, child, parent)) - manglers.append(mangler) - preambles.append(preamble) + compliant_child_knl = get_strided_child_knl(insn, child, parent) # }}} - new_parent = register_preamble_generators(parent, preambles) - new_parent = register_function_manglers(new_parent, manglers) + # Over here lets add a change to the codegeneration state fo the kernel and + # return it. + cgs_old = parent.code_generation_state + cgs_new = cgs_old.copy(external_kernel=compliant_child_knl) + new_parent = parent.copy(code_generation_state=cgs_new) return new_parent -- GitLab From 883cbe8c94d51c5e02f9c47f0a590b07f6519f17 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 09:11:54 -0600 Subject: [PATCH 069/116] Added support for auxillary kernels --- loopy/kernel/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 038ef23ac..701183542 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -150,6 +150,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): a mapping from substitution names to :class:`SubstitutionRule` objects + .. attribute:: auxillary_kernels + + A list of kernels that get mapped to functions. + .. attribute:: iname_slab_increments a dictionary mapping inames to (lower_incr, @@ -196,6 +200,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): temporary_variables={}, iname_to_tag={}, substitutions={}, + auxillary_kernels=[], function_manglers=[ default_function_mangler, single_arg_function_mangler, @@ -281,6 +286,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): local_sizes=local_sizes, iname_to_tag=iname_to_tag, substitutions=substitutions, + auxillary_kernels=auxillary_kernels, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, -- GitLab From cee39d69a41c2e855608a78e1f42bfddd7f2295c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 09:12:36 -0600 Subject: [PATCH 070/116] Added support for regsitering an auxillary kernel --- loopy/transform/register_knl.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index f88155ddb..df93ddab0 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -23,7 +23,6 @@ THE SOFTWARE. """ from loopy.kernel import LoopKernel -from loopy.codegen import generate_code_v2 from loopy.kernel.instruction import (ArrayCallInstruction, Assignment, NoOpInstruction, BarrierInstruction, CallInstruction, _DataObliviousInstruction) @@ -64,6 +63,35 @@ def get_strided_child_knl(insn, child_knl, parent_knl): return child_knl # }}} +# {{{ register auxillary kernel + + +def register_auxillary_kernel(kernel, aux_kernel): + + def aux_mangler(kernel, name, arg_dtypes): + if name == aux_kernel.name: + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name=name, + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + return None + + new_manglers = kernel.function_manglers[:] + new_auxillary_kernels = kernel.auxillary_kernels + + if aux_mangler not in new_manglers: + new_manglers.insert(0, aux_mangler) + + if aux_kernel not in new_auxillary_kernels: + new_auxillary_kernels.append(aux_kernel) + + return kernel.copy(function_manglers=new_manglers, + auxillary_kernels=new_auxillary_kernels) + +# }}} + # {{{ main entrypoint -- GitLab From c540c9f476f1737e207915dc883adcf0559e2145 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 09:24:58 -0600 Subject: [PATCH 071/116] registers auxillary kernel into the parent kernel --- loopy/transform/register_knl.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index df93ddab0..b85a60f4f 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -79,7 +79,7 @@ def register_auxillary_kernel(kernel, aux_kernel): return None new_manglers = kernel.function_manglers[:] - new_auxillary_kernels = kernel.auxillary_kernels + new_auxillary_kernels = kernel.auxillary_kernels[:] if aux_mangler not in new_manglers: new_manglers.insert(0, aux_mangler) @@ -148,18 +148,14 @@ def register_callable_kernel(parent, func_name, child): # }}} - # {{{ collecting the manglers and preambles for each instruction + # {{{ transforming the kernel with the registered part for insn in insns: compliant_child_knl = get_strided_child_knl(insn, child, parent) - # }}} + parent = register_auxillary_kernel(parent, compliant_child_knl) - # Over here lets add a change to the codegeneration state fo the kernel and - # return it. - cgs_old = parent.code_generation_state - cgs_new = cgs_old.copy(external_kernel=compliant_child_knl) - new_parent = parent.copy(code_generation_state=cgs_new) + # }}} - return new_parent + return parent # }}} -- GitLab From fc0a61607198dff8633ce2f2cbd31d09f42d8209 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 09:40:32 -0600 Subject: [PATCH 072/116] added comment about the complacency --- loopy/transform/register_knl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index b85a60f4f..0afb77504 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -68,6 +68,8 @@ def get_strided_child_knl(insn, child_knl, parent_knl): def register_auxillary_kernel(kernel, aux_kernel): + # FIXME: needs checks whether it is compliant. + def aux_mangler(kernel, name, arg_dtypes): if name == aux_kernel.name: from loopy.kernel.data import CallMangleInfo -- GitLab From d2c10b4b900c6be34b42a52264dd0fcd36fb41cd Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 17:05:43 -0600 Subject: [PATCH 073/116] Removing stuff from register_knl as it is not the place we would need codegen stuff happening --- loopy/transform/register_knl.py | 72 +++++++-------------------------- 1 file changed, 14 insertions(+), 58 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 0afb77504..90d05afc5 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -23,9 +23,11 @@ THE SOFTWARE. """ from loopy.kernel import LoopKernel -from loopy.kernel.instruction import (ArrayCallInstruction, Assignment, +from loopy.kernel.instruction import ( + ArrayCallInstruction, Assignment, NoOpInstruction, BarrierInstruction, CallInstruction, _DataObliviousInstruction) +from loopy.diagnostic import LoopyError __doc__ = """ .. currentmodule:: loopy @@ -37,8 +39,6 @@ __doc__ = """ def get_strided_child_knl(insn, child_knl, parent_knl): - child_knl = child_knl.copy( - name=insn.expression.function.name + "_" + insn.id) dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) # {{{ creating the parent to child parameter association dictionary @@ -63,41 +63,11 @@ def get_strided_child_knl(insn, child_knl, parent_knl): return child_knl # }}} -# {{{ register auxillary kernel - - -def register_auxillary_kernel(kernel, aux_kernel): - - # FIXME: needs checks whether it is compliant. - - def aux_mangler(kernel, name, arg_dtypes): - if name == aux_kernel.name: - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name=name, - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - return None - - new_manglers = kernel.function_manglers[:] - new_auxillary_kernels = kernel.auxillary_kernels[:] - - if aux_mangler not in new_manglers: - new_manglers.insert(0, aux_mangler) - - if aux_kernel not in new_auxillary_kernels: - new_auxillary_kernels.append(aux_kernel) - - return kernel.copy(function_manglers=new_manglers, - auxillary_kernels=new_auxillary_kernels) - -# }}} # {{{ main entrypoint -def register_callable_kernel(parent, func_name, child): +def register_callable_kernel(parent, function_name, child): """ The purpose of this transformation is so that one can inoke the child kernel in the parent kernel. @@ -107,7 +77,7 @@ def register_callable_kernel(parent, func_name, child): This is the "main" kernel which will mostly remain unaltered and one can interpret it as stitching up the child kernel in the parent kernel. - :arg func_name + :arg function_name The name of the function call with which the child kernel must be associated in the parent kernel @@ -127,35 +97,21 @@ def register_callable_kernel(parent, func_name, child): assert isinstance(parent, LoopKernel) assert isinstance(child, LoopKernel) + assert isinstance(function_name, str) + assert function_name not in parent.auxillary_kernels, ( + "%s has already been used with some other kernel. One" + "function can only be associated with a single kernel" % ( + function_name)) # }}} - # {{{ Getting the instruction which have this function. - - insns = [] - - for insn in parent.instructions: - if isinstance(insn, ArrayCallInstruction): - if insn.expression.function.name == func_name: - # only collecting the array instructions that are needed by the - # connection - insns.append(insn) - elif isinstance(insn, (Assignment, NoOpInstruction, - BarrierInstruction, Assignment, - CallInstruction, _DataObliviousInstruction)): - pass - else: - raise NotImplementedError("register_knl not made for %s type of" - "instruction" % (str(type(insn)))) + # FIXME: needs checks whether it is compliant. + new_auxillary_kernels = kernel.auxillary_kernels + new_auxillary_kernels[function_name] = auxillary_kernel - # }}} + return kernel.copy(auxillary_kernels=new_auxillary_kernels) - # {{{ transforming the kernel with the registered part - for insn in insns: - compliant_child_knl = get_strided_child_knl(insn, child, parent) - parent = register_auxillary_kernel(parent, compliant_child_knl) - # }}} return parent -- GitLab From db46000431694b979c4de3b4aef218f74b1e8772 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 17:08:54 -0600 Subject: [PATCH 074/116] Fixes typo in regster_knl --- loopy/transform/register_knl.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 90d05afc5..538deff40 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -106,14 +106,10 @@ def register_callable_kernel(parent, function_name, child): # }}} # FIXME: needs checks whether it is compliant. - new_auxillary_kernels = kernel.auxillary_kernels - new_auxillary_kernels[function_name] = auxillary_kernel + new_auxillary_kernels = parent.auxillary_kernels + new_auxillary_kernels[function_name] = child - return kernel.copy(auxillary_kernels=new_auxillary_kernels) - - - - return parent + return parent.copy(auxillary_kernels=new_auxillary_kernels) # }}} -- GitLab From 792ac2667221ae918ba268ab886636fd3e010aab Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 17:10:11 -0600 Subject: [PATCH 075/116] Added auxillary kernels as a part of the LoopKernel --- loopy/kernel/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 701183542..01580caed 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -152,7 +152,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: auxillary_kernels - A list of kernels that get mapped to functions. + A dictionary of kernels that are to be mapped from their registered + function names .. attribute:: iname_slab_increments @@ -200,7 +201,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): temporary_variables={}, iname_to_tag={}, substitutions={}, - auxillary_kernels=[], + auxillary_kernels={}, function_manglers=[ default_function_mangler, single_arg_function_mangler, -- GitLab From 91a8c35cf785f7a243a7dc42dcc3cb571d2caeb0 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 18:03:23 -0600 Subject: [PATCH 076/116] Removed pointered subscript --- loopy/symbolic.py | 24 ------------------------ loopy/target/c/codegen/expression.py | 5 +++-- loopy/type_inference.py | 3 --- 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 754e88498..e5b514b20 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -316,10 +316,6 @@ class DependencyMapper(DependencyMapperBase): map_array_call = map_call - def map_pointered_subscript(self, expr, *args): - raise NotImplementedError("Dependency Mapper needs to be implemented" - "for pointer variable") - map_linear_subscript = DependencyMapperBase.map_subscript def map_type_cast(self, expr): @@ -764,26 +760,6 @@ class RuleArgument(p.Expression): mapper_method = intern("map_rule_argument") - -class PointeredSubscript(p.Expression): - """ This is used to give out the pointer support so that they could be - implemented in function calls for arrays. - """ - - init_arg_name = ("subscript") - - def __init__(self, subscript): - self.subscript = subscript - assert isinstance(subscript, p.Subscript) - - def __getinitargs__(self): - return (self.expr) - - def stringifier(self): - return StringifyMapper - - mapper_method = intern("map_pointered_subscript") - # }}} diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index ab0d66783..3bb1e86b9 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -165,8 +165,9 @@ class ExpressionToCExpressionMapper(IdentityMapper): def map_tagged_variable(self, expr, type_context): return var(expr.name) - def map_pointered_subscript(self, expr, type_context): - return var("&")(self.map_subscript(expr.subscript, type_context)) + def map_sub_array_ref(self, expr, type_context): + return var("&")(self.rec(expr.get_begin_subscript(), + type_context)) def map_subscript(self, expr, type_context): def base_impl(expr, type_context): diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 71d8efc9a..3d44b4d74 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -404,9 +404,6 @@ class TypeInferenceMapper(CombineMapper): map_array_call = map_call - def map_pointered_subscript(self, expr): - return self.rec(expr.subscript.aggregate) - # }}} -- GitLab From 2b5b1fc7ef3e6e4ecd750c410dbee79a89e02a45 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 27 Feb 2018 18:21:18 -0600 Subject: [PATCH 077/116] Added mangler just where the instruction is created --- loopy/target/c/__init__.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 948ae702b..e944b4e9f 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -965,11 +965,10 @@ class CASTBuilder(ASTBuilderBase): ecm = codegen_state.expression_to_code_mapper from pymbolic.mapper.stringifier import PREC_NONE - from loopy.symbolic import PointeredSubscript as Pvar func_id = insn.expression.function.name + "_" + str(insn.id) - parameters = insn.expression.get_params_as_starting_subs() - assignments = tuple([a.get_begin_subscript() for a in insn.assignees]) + parameters = insn.expression.parameters + assignments = insn.assignees assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) @@ -978,26 +977,26 @@ class CASTBuilder(ASTBuilderBase): par_dtypes = tuple([ecm.infer_type(par) for par in parameters] + [ecm.infer_type(ass) for ass in insn.assignees]) - mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) - if mangle_result is None: + if insn.expression.function.name not in ( + codegen_state.kernel.auxillary_kernels): raise RuntimeError("function '%s' unknown--" - "maybe you need to register a function mangler?" - % func_id) + "maybe you need to register a callable kernel?" + % insn.expression.function.name) - assert mangle_result.arg_dtypes is not None + # FIXME: This has to be interpreted from the kernel + # Assumption: That the compatibilty checks would be done by + # `register_knl` + mangle_result = CallMangleInfo( + target_name=func_id, + result_dtypes=par_dtypes, + arg_dtypes=par_dtypes) - ''' - # Not exactly sure what this is doing will need to think about it - # later. - if mangle_result.target_name == "loopy_make_tuple": - # This shortcut avoids actually having to emit a 'make_tuple' function. - return self.emit_tuple_assignment(codegen_state, insn) - ''' + assert mangle_result.arg_dtypes is not None from loopy.expression import dtype_to_type_context c_parameters = [ - ecm(Pvar(par), PREC_NONE, + ecm(par, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( -- GitLab From 2cb08c69e74a9eaff6cbdc1459654d64b86e07ab Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 04:53:00 -0600 Subject: [PATCH 078/116] Got it working. Need to make finishing touches --- loopy/codegen/__init__.py | 56 +++++++- loopy/codegen/auxillary_kernels.py | 210 +++++++++++++++++++++++++++++ loopy/codegen/result.py | 1 - loopy/target/opencl.py | 2 + 4 files changed, 263 insertions(+), 6 deletions(-) create mode 100644 loopy/codegen/auxillary_kernels.py diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index e83515d31..9d7945f3e 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -32,6 +32,10 @@ from pytools.persistent_dict import WriteOncePersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION +from cgen import Collection + +from loopy.kernel.instruction import ArrayCallInstruction + import logging logger = logging.getLogger(__name__) @@ -187,6 +191,11 @@ class CodeGenerationState(object): generated. .. attribute:: schedule_index_end + + .. attribute:: is_generating_master_kernel + + True of False indication if the code generation is happening for a + master kernel or auxillary kernels respectively. """ def __init__(self, kernel, @@ -196,7 +205,8 @@ class CodeGenerationState(object): vectorization_info=None, var_name_generator=None, is_generating_device_code=None, gen_program_name=None, - schedule_index_end=None): + schedule_index_end=None, + is_generating_master_kernel=True): self.kernel = kernel self.implemented_data_info = implemented_data_info self.implemented_domain = implemented_domain @@ -211,6 +221,7 @@ class CodeGenerationState(object): self.is_generating_device_code = is_generating_device_code self.gen_program_name = gen_program_name self.schedule_index_end = schedule_index_end + self.is_generating_master_kernel = is_generating_master_kernel # {{{ copy helpers @@ -219,7 +230,8 @@ class CodeGenerationState(object): var_subst_map=None, vectorization_info=None, is_generating_device_code=None, gen_program_name=None, - schedule_index_end=None): + schedule_index_end=None, + is_generating_master_kernel=None): if kernel is None: kernel = self.kernel @@ -242,6 +254,9 @@ class CodeGenerationState(object): if schedule_index_end is None: schedule_index_end = self.schedule_index_end + if is_generating_master_kernel is None: + is_generating_master_kernel = self.is_generating_master_kernel + return CodeGenerationState( kernel=kernel, implemented_data_info=implemented_data_info, @@ -257,7 +272,8 @@ class CodeGenerationState(object): var_name_generator=self.var_name_generator, is_generating_device_code=is_generating_device_code, gen_program_name=gen_program_name, - schedule_index_end=schedule_index_end) + schedule_index_end=schedule_index_end, + is_generating_master_kernel=is_generating_master_kernel) def copy_and_assign(self, name, value): """Make a copy of self with variable *name* fixed to *value*.""" @@ -371,7 +387,6 @@ class PreambleInfo(ImmutableRecord): .. attribute:: codegen_state """ - # {{{ main code generation entrypoint def generate_code_v2(kernel): @@ -470,9 +485,30 @@ def generate_code_v2(kernel): kernel.target.host_program_name_prefix + kernel.name + kernel.target.host_program_name_suffix), - schedule_index_end=len(kernel.schedule)) + schedule_index_end=len(kernel.schedule), + is_generating_master_kernel=True) from loopy.codegen.result import generate_host_or_device_program + # {{{ handling auxillary kernels + + auxillary_functions = [] + + for func, aux_knl in kernel.auxillary_kernels.items(): + from loopy.codegen.auxillary_kernels import ( + get_instruction_specific_kernel, + generate_auxillary_kernel_device_code) + for insn in kernel.instructions: + if isinstance(insn, ArrayCallInstruction) and ( + insn.expression.function.name==func): + compliant_knl = get_instruction_specific_kernel( + insn, kernel, aux_knl) + aux_func = generate_auxillary_kernel_device_code(compliant_knl, + kernel.target).device_programs[0].ast + auxillary_functions.append(aux_func) + else: + # TODO: need to check for the other instructions + pass + # }}} codegen_result = generate_host_or_device_program( codegen_state, schedule_index=0) @@ -510,6 +546,16 @@ def generate_code_v2(kernel): # }}} + # TODO: Currently Sticks all the functions into all the kernels, + # need to identify which function goes with which kernel + new_dev_progs = [] + for dev_prog in codegen_result.device_programs: + for func in auxillary_functions: + new_dev_progs.append( + dev_prog.copy(ast=Collection([func, dev_prog.ast]))) + + codegen_result = codegen_result.copy(device_programs=new_dev_progs) + # For faster unpickling in the common case when implemented_domains isn't needed. from loopy.tools import LazilyUnpicklingDict codegen_result = codegen_result.copy( diff --git a/loopy/codegen/auxillary_kernels.py b/loopy/codegen/auxillary_kernels.py new file mode 100644 index 000000000..07c5c7756 --- /dev/null +++ b/loopy/codegen/auxillary_kernels.py @@ -0,0 +1,210 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2018 Kaushik Kulkarni" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import islpy as isl + +from loopy.codegen import ( + ImplementedDataInfo, + PreambleInfo, + CodeGenerationState) +from loopy.diagnostic import LoopyError +import logging +logger = logging.getLogger(__name__) + + +__doc__ = """ +.. currentmodule:: loopy + +.. autofunction:: generate_auxillary_kernel_device_code + +.. autofunction:: get_instruction_specific_kernel +""" + +# {{{ generating compliant kernel for the given instruction +def get_instruction_specific_kernel(insn, parent_knl, child_knl): + child_knl = child_knl.copy( + name=insn.expression.function.name + "_" + insn.id) + dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) + + # {{{ creating the parent to child parameter association dictionary + + child_arg_to_parent = {} + for child_par, parent_par in zip(child_knl.args, + insn.expression.parameters + insn.assignees): # noqa + child_arg_to_parent[child_par.name] = ( + parent_par.subscr.aggregate.name) + + # }}} + + new_args = [] + for arg in child_knl.args: + child_dim_tag = ( + dim_tags_dict[child_arg_to_parent[arg.name]]) + new_args.append( + arg.copy(dim_tags=child_dim_tag)) + + child_knl = child_knl.copy(args=new_args) + + return child_knl + +# }}} + +# {{{ code generation for the auxillary kernel + + +def generate_auxillary_kernel_device_code(kernel, target): + """ + :returns: a :class:`CodeGenerationResult` + """ + kernel = kernel.copy(target=target) + + from loopy.kernel import kernel_state + if kernel.state == kernel_state.INITIAL: + from loopy.preprocess import preprocess_kernel + kernel = preprocess_kernel(kernel) + + if kernel.schedule is None: + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) + + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError( + "cannot generate code for a kernel that has not been " + "scheduled") + + from loopy.type_inference import infer_unknown_types + kernel = infer_unknown_types(kernel, expect_completion=True) + + from loopy.check import pre_codegen_checks + pre_codegen_checks(kernel) + + logger.info("%s: generate Auxillary Kernel code: start" % kernel.name) + + # {{{ examine arg list + + from loopy.kernel.data import ValueArg + from loopy.kernel.array import ArrayBase + + implemented_data_info = [] + + for arg in kernel.args: + is_written = arg.name in kernel.get_written_variables() + if isinstance(arg, ArrayBase): + implemented_data_info.extend( + arg.decl_info( + kernel.target, + is_written=is_written, + index_dtype=kernel.index_dtype)) + + elif isinstance(arg, ValueArg): + implemented_data_info.append(ImplementedDataInfo( + target=kernel.target, + name=arg.name, + dtype=arg.dtype, + arg_class=ValueArg, + is_written=is_written)) + + else: + raise ValueError("argument type not understood: '%s'" % type(arg)) + + allow_complex = False + for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): + if var.dtype.involves_complex(): + allow_complex = True + + # }}} + + seen_dtypes = set() + seen_functions = set() + seen_atomic_dtypes = set() + + initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions) + codegen_state = CodeGenerationState( + kernel=kernel, + implemented_data_info=implemented_data_info, + implemented_domain=initial_implemented_domain, + implemented_predicates=frozenset(), + seen_dtypes=seen_dtypes, + seen_functions=seen_functions, + seen_atomic_dtypes=seen_atomic_dtypes, + var_subst_map={}, + allow_complex=allow_complex, + var_name_generator=kernel.get_var_name_generator(), + is_generating_device_code=False, + gen_program_name=kernel.name, + schedule_index_end=len(kernel.schedule), + is_generating_master_kernel=False) + + from loopy.codegen.result import generate_host_or_device_program + codegen_result = generate_host_or_device_program( + codegen_state, + schedule_index=0) + + device_code_str = codegen_result.device_code() + + from loopy.check import check_implemented_domains + assert check_implemented_domains( + kernel, codegen_result.implemented_domains, device_code_str) + + # {{{ handle preambles + + for arg in kernel.args: + seen_dtypes.add(arg.dtype) + for tv in six.itervalues(kernel.temporary_variables): + seen_dtypes.add(tv.dtype) + + preambles = kernel.preambles[:] + + preamble_info = PreambleInfo( + kernel=kernel, + seen_dtypes=seen_dtypes, + seen_functions=seen_functions, + # a set of LoopyTypes (!) + seen_atomic_dtypes=seen_atomic_dtypes, + codegen_state=codegen_state + ) + + preamble_generators = kernel.preamble_generators + for prea_gen in preamble_generators: + preambles.extend(prea_gen(preamble_info)) + + codegen_result = codegen_result.copy(device_preambles=preambles) + + # }}} + + # For faster unpickling in the common case when implemented_domains isn't + # needed. + from loopy.tools import LazilyUnpicklingDict + codegen_result = codegen_result.copy( + implemented_domains=LazilyUnpicklingDict( + codegen_result.implemented_domains)) + + logger.info("%s: generate code: done" % kernel.name) + + return codegen_result + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 4318ad71c..fcd9e2f6c 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -270,7 +270,6 @@ def wrap_in_if(codegen_state, condition_exprs, inner): # }}} - # {{{ program generation top-level def generate_host_or_device_program(codegen_state, schedule_index): diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 31e0569b9..9767aeaf4 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -400,6 +400,8 @@ class OpenCLCASTBuilder(CASTBuilder): from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) + if not codegen_state.is_generating_master_kernel: + return fdecl fdecl = fdecl.subdecl from cgen.opencl import CLKernel, CLRequiredWorkGroupSize -- GitLab From 703aeea92fb48fc04538129b6a6906098401875b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 05:54:26 -0600 Subject: [PATCH 079/116] adds the functions only to the first kernel --- loopy/codegen/__init__.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 9d7945f3e..97692e0e5 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -34,7 +34,10 @@ from loopy.version import DATA_MODEL_VERSION from cgen import Collection -from loopy.kernel.instruction import ArrayCallInstruction +from loopy.kernel.instruction import ( + ArrayCallInstruction, Assignment, + NoOpInstruction, BarrierInstruction, CallInstruction, + _DataObliviousInstruction) import logging logger = logging.getLogger(__name__) @@ -193,7 +196,7 @@ class CodeGenerationState(object): .. attribute:: schedule_index_end .. attribute:: is_generating_master_kernel - + True of False indication if the code generation is happening for a master kernel or auxillary kernels respectively. """ @@ -206,7 +209,7 @@ class CodeGenerationState(object): is_generating_device_code=None, gen_program_name=None, schedule_index_end=None, - is_generating_master_kernel=True): + is_generating_master_kernel=None): self.kernel = kernel self.implemented_data_info = implemented_data_info self.implemented_domain = implemented_domain @@ -499,15 +502,21 @@ def generate_code_v2(kernel): generate_auxillary_kernel_device_code) for insn in kernel.instructions: if isinstance(insn, ArrayCallInstruction) and ( - insn.expression.function.name==func): + insn.expression.function.name == func): compliant_knl = get_instruction_specific_kernel( insn, kernel, aux_knl) + # TODO: Also need to take input such as allow_complex, + # and preambles from the aux kernels aux_func = generate_auxillary_kernel_device_code(compliant_knl, kernel.target).device_programs[0].ast auxillary_functions.append(aux_func) - else: - # TODO: need to check for the other instructions + elif isinstance(insn, (Assignment, NoOpInstruction, Assignment, + BarrierInstruction, CallInstruction, + _DataObliviousInstruction)): pass + else: + raise NotImplementedError("register_knl not made for %s type of" + "instruciton" % (str(type(insn)))) # }}} codegen_result = generate_host_or_device_program( codegen_state, @@ -546,15 +555,15 @@ def generate_code_v2(kernel): # }}} - # TODO: Currently Sticks all the functions into all the kernels, + # {{{ Pasting the auxillary functions code to the first device program + # TODO: Currently Sticks all the functions only in the first dev_prog, # need to identify which function goes with which kernel - new_dev_progs = [] - for dev_prog in codegen_result.device_programs: - for func in auxillary_functions: - new_dev_progs.append( - dev_prog.copy(ast=Collection([func, dev_prog.ast]))) - - codegen_result = codegen_result.copy(device_programs=new_dev_progs) + dev_prog = codegen_result.device_programs[0] + for func in auxillary_functions: + new_dev_prog = dev_prog.copy(ast=Collection([func, dev_prog.ast])) + new_device_programs = [new_dev_prog] + codegen_result.device_programs[1:] + codegen_result = codegen_result.copy(device_programs=new_device_programs) + # }}} # For faster unpickling in the common case when implemented_domains isn't needed. from loopy.tools import LazilyUnpicklingDict -- GitLab From 1adb05e94583f767c676e7ec29d564d8e9e599b3 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 06:14:15 -0600 Subject: [PATCH 080/116] Removed unnecesary function for ArrayCall as the same can be achieved using SubArrayRef --- loopy/symbolic.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index e5b514b20..8ac20c33d 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -712,12 +712,6 @@ class ArrayCall(p.Call): self.function = function self.parameters = parameters - def get_params_as_starting_subs(self): - result = [] - for par in self.parameters: - result.append(par.get_begin_subscript()) - return tuple(result) - mapper_method = intern("map_array_call") -- GitLab From 3dd7508eaa33aa2a5f9ec26d13d7af6c0bdc8bdc Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 06:16:49 -0600 Subject: [PATCH 081/116] Flake8 --- loopy/codegen/__init__.py | 1 + loopy/codegen/auxillary_kernels.py | 7 ++++++ loopy/codegen/result.py | 1 + loopy/transform/register_knl.py | 36 +----------------------------- 4 files changed, 10 insertions(+), 35 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 97692e0e5..c3da5a360 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -392,6 +392,7 @@ class PreambleInfo(ImmutableRecord): # {{{ main code generation entrypoint + def generate_code_v2(kernel): """ :returns: a :class:`CodeGenerationResult` diff --git a/loopy/codegen/auxillary_kernels.py b/loopy/codegen/auxillary_kernels.py index 07c5c7756..25b1fc9f3 100644 --- a/loopy/codegen/auxillary_kernels.py +++ b/loopy/codegen/auxillary_kernels.py @@ -43,7 +43,12 @@ __doc__ = """ """ # {{{ generating compliant kernel for the given instruction + + def get_instruction_specific_kernel(insn, parent_knl, child_knl): + """ Generates the kernel with the arguments strided so that it is compliant + with the given instruction. Returns the new compliant kernel. + """ child_knl = child_knl.copy( name=insn.expression.function.name + "_" + insn.id) dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) @@ -76,6 +81,8 @@ def get_instruction_specific_kernel(insn, parent_knl, child_knl): def generate_auxillary_kernel_device_code(kernel, target): """ + Generates device programs for the given auxillary kernel, with the target + specified by the parent kernel :returns: a :class:`CodeGenerationResult` """ kernel = kernel.copy(target=target) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index fcd9e2f6c..4318ad71c 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -270,6 +270,7 @@ def wrap_in_if(codegen_state, condition_exprs, inner): # }}} + # {{{ program generation top-level def generate_host_or_device_program(codegen_state, schedule_index): diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 538deff40..c1f0e77dd 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -23,11 +23,6 @@ THE SOFTWARE. """ from loopy.kernel import LoopKernel -from loopy.kernel.instruction import ( - ArrayCallInstruction, Assignment, - NoOpInstruction, BarrierInstruction, CallInstruction, - _DataObliviousInstruction) -from loopy.diagnostic import LoopyError __doc__ = """ .. currentmodule:: loopy @@ -35,35 +30,6 @@ __doc__ = """ .. autofunction:: register_callable_kernel """ -# {{{ changing the argument strides - - -def get_strided_child_knl(insn, child_knl, parent_knl): - dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) - - # {{{ creating the parent to child parameter association dictionary - - child_arg_to_parent = {} - for child_par, parent_par in zip(child_knl.args, - insn.expression.parameters + insn.assignees): - child_arg_to_parent[child_par.name] = ( - parent_par.subscr.aggregate.name) - - # }}} - - new_args = [] - for arg in child_knl.args: - child_dim_tag = ( - dim_tags_dict[child_arg_to_parent[arg.name]]) - new_args.append( - arg.copy(dim_tags=child_dim_tag)) - - child_knl = child_knl.copy(args=new_args) - - return child_knl -# }}} - - # {{{ main entrypoint @@ -105,7 +71,7 @@ def register_callable_kernel(parent, function_name, child): # }}} - # FIXME: needs checks whether it is compliant. + # FIXME: needs checks whether the kernels are compliant new_auxillary_kernels = parent.auxillary_kernels new_auxillary_kernels[function_name] = child -- GitLab From 73f4d1e07b23417da995fea79eedbaadf339119e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 06:26:49 -0600 Subject: [PATCH 082/116] SubArrayRef::subscr->subscript --- loopy/codegen/auxillary_kernels.py | 2 +- loopy/kernel/creation.py | 2 +- loopy/kernel/instruction.py | 4 ++-- loopy/symbolic.py | 26 +++++++++++++------------- loopy/type_inference.py | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/loopy/codegen/auxillary_kernels.py b/loopy/codegen/auxillary_kernels.py index 25b1fc9f3..9dfc04343 100644 --- a/loopy/codegen/auxillary_kernels.py +++ b/loopy/codegen/auxillary_kernels.py @@ -59,7 +59,7 @@ def get_instruction_specific_kernel(insn, parent_knl, child_knl): for child_par, parent_par in zip(child_knl.args, insn.expression.parameters + insn.assignees): # noqa child_arg_to_parent[child_par.name] = ( - parent_par.subscr.aggregate.name) + parent_par.subscript.aggregate.name) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 6aefb9083..92138526e 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -503,7 +503,7 @@ def parse_insn(groups, insn_options): elif isinstance(inner_lhs_i, (Subscript, LinearSubscript)): assignee_names.append(inner_lhs_i.aggregate.name) elif isinstance(inner_lhs_i, (SubArrayRef)): - assignee_names.append(inner_lhs_i.subscr.aggregate.name) + assignee_names.append(inner_lhs_i.subscript.aggregate.name) else: raise LoopyError("left hand side of assignment '%s' must " "be variable, subscript or SubArrayRef" % (lhs_i,)) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index d56e24c34..fa8ae592b 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -501,7 +501,7 @@ def _get_assignee_var_name(expr): return agg.name elif isinstance(expr, SubArrayRef): - agg = expr.subscr.aggregate + agg = expr.subscript.aggregate assert isinstance(agg, Variable) return agg.name @@ -1160,7 +1160,7 @@ class ArrayCallInstruction(MultiAssignmentBase): def get_parameters_dim_tag_dict(self, arg_dict): dim_tags_dict = {} for par in (self.assignees + self.expression.parameters): - arg_name = par.subscr.aggregate.name + arg_name = par.subscript.aggregate.name dim_tags_dict[arg_name] = par.get_inner_dim_tags( arg_dict[arg_name].dim_tags) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8ac20c33d..28258f08a 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -111,7 +111,7 @@ class IdentityMapperMixin(object): expr.parameters) def map_sub_array_ref(self, expr, *args): - return SubArrayRef(expr.swept_inames, expr.subscr) + return SubArrayRef(expr.swept_inames, expr.subscript) map_type_cast = map_type_annotation @@ -175,7 +175,7 @@ class WalkMapper(WalkMapperBase): return self.rec(expr.swept_inames, *args) - self.rec(expr.subscr, *args) + self.rec(expr.subscript, *args) def map_array_call(self, expr, *args): if not self.visit(expr): @@ -256,7 +256,7 @@ class StringifyMapper(StringifyMapperBase): def map_sub_array_ref(self, expr, prec): return "SubArrayRef({inames}, ({subscr}))".format( inames=self.rec(expr.swept_inames, prec), - subscr=self.rec(expr.subscr, prec)) + subscr=self.rec(expr.subscript, prec)) class UnidirectionalUnifier(UnidirectionalUnifierBase): @@ -311,7 +311,7 @@ class DependencyMapper(DependencyMapperBase): return set() def map_sub_array_ref(self, expr, *args): - deps = self.rec(expr.subscr, *args) + deps = self.rec(expr.subscript, *args) return deps - set(iname for iname in expr.swept_inames) map_array_call = map_call @@ -638,12 +638,12 @@ class SubArrayRef(p.Expression): These are a tuple of sweeping inames over the array. - .. attribute:: subscr + .. attribute:: subscript The subscript whose adress space is to be referenced """ - init_arg_names = ("swept_inames", "subscr") + init_arg_names = ("swept_inames", "subscript") def __init__(self, _inames, _subscr): # {{{ Sanity Checks @@ -653,16 +653,16 @@ class SubArrayRef(p.Expression): assert isinstance(_subscr, p.Subscript) # }}} self.swept_inames = _inames - self.subscr = _subscr + self.subscript = _subscr def get_begin_subscript(self): starting_inames = [] - for iname in self.subscr.index_tuple: + for iname in self.subscript.index_tuple: if iname in self.swept_inames: starting_inames.append(parse('0')) else: starting_inames.append(iname) - return p.Subscript(self.subscr.aggregate, tuple(starting_inames)) + return p.Subscript(self.subscript.aggregate, tuple(starting_inames)) def get_inner_dim_tags(self, arg_dim_tags): """ Gives the dim tags for the inner inames. @@ -672,21 +672,21 @@ class SubArrayRef(p.Expression): """ from loopy.kernel.array import FixedStrideArrayDimTag as DimTag inner_dim_tags = [] - for dim_tag, iname in zip(arg_dim_tags, self.subscr.index_tuple): + for dim_tag, iname in zip(arg_dim_tags, self.subscript.index_tuple): if iname in self.swept_inames: inner_dim_tags.append(DimTag(dim_tag.stride)) return inner_dim_tags def __getinitargs__(self): - return (self.swept_inames, self.subscr) + return (self.swept_inames, self.subscript) def get_hash(self): - return hash((self.__class__, self.swept_inames, self.subscr)) + return hash((self.__class__, self.swept_inames, self.subscript)) def is_equal(self, other): return (other.__class__ == self.__class__ - and other.subscr == self.subscr + and other.subscript == self.subscript and other.swept_inames == self.swept_inames) def stringifier(self): diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 3d44b4d74..32fefed7b 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -400,7 +400,7 @@ class TypeInferenceMapper(CombineMapper): for rec_result in rec_results] def map_sub_array_ref(self, expr): - return self.rec(expr.subscr) + return self.rec(expr.subscript) map_array_call = map_call -- GitLab From 1cbbefea5a71cc2bc25b3e89654a91b9de3b2d85 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 06:30:59 -0600 Subject: [PATCH 083/116] adds pytest_cache to .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a7280e374..17581411b 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,6 @@ htmlcov .ipynb_checkpoints lextab.py yacctab.py -.pytest_cache +.pytest_cache/ .cache -- GitLab From cc294a2ba2f55b405060fa668584703b95809e78 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 06:44:25 -0600 Subject: [PATCH 084/116] Tries a fix for failing tests --- loopy/codegen/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index c3da5a360..a7fba81ae 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -559,9 +559,10 @@ def generate_code_v2(kernel): # {{{ Pasting the auxillary functions code to the first device program # TODO: Currently Sticks all the functions only in the first dev_prog, # need to identify which function goes with which kernel - dev_prog = codegen_result.device_programs[0] + new_dev_prog = codegen_result.device_programs[0] for func in auxillary_functions: - new_dev_prog = dev_prog.copy(ast=Collection([func, dev_prog.ast])) + new_dev_prog = new_dev_prog.copy( + ast=Collection([func, new_dev_prog.ast])) new_device_programs = [new_dev_prog] + codegen_result.device_programs[1:] codegen_result = codegen_result.copy(device_programs=new_device_programs) # }}} -- GitLab From a760eb8412c1354688f1054bb3ce7ca91fbc3c15 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 11:14:36 -0600 Subject: [PATCH 085/116] [ci skip] removing the pytest cache --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 17581411b..b0668bd73 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,6 @@ htmlcov .ipynb_checkpoints lextab.py yacctab.py -.pytest_cache/ +.pytest_cache/* .cache -- GitLab From f6058fe7322bc8d6b488d7d49bbc0f75eb51c972 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Feb 2018 11:17:37 -0600 Subject: [PATCH 086/116] [ci skip] removes pytest_cache --- .pytest_cache/v/cache/lastfailed | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .pytest_cache/v/cache/lastfailed diff --git a/.pytest_cache/v/cache/lastfailed b/.pytest_cache/v/cache/lastfailed deleted file mode 100644 index 9e26dfeeb..000000000 --- a/.pytest_cache/v/cache/lastfailed +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file -- GitLab From 3fbf0c9bc2d0ef03fbb143b2a65a7b3082089ef2 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 1 Mar 2018 18:32:46 -0600 Subject: [PATCH 087/116] Added guessing var shaper for SubArrayRef --- loopy/symbolic.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 28258f08a..bd669ac59 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -251,7 +251,7 @@ class StringifyMapper(StringifyMapperBase): return "cast(%s, %s)" % (repr(expr.type), self.rec(expr.child, PREC_NONE)) def map_array_call(self, expr, prec): - return "Array%s" % self.rec(expr, prec) + return self.map_call(expr, prec) def map_sub_array_ref(self, expr, prec): return "SubArrayRef({inames}, ({subscr}))".format( @@ -647,6 +647,9 @@ class SubArrayRef(p.Expression): def __init__(self, _inames, _subscr): # {{{ Sanity Checks + if not isinstance(_inames, tuple): + assert isinstance(_inames, p.Variable) + _inames = (_inames,) assert isinstance(_inames, tuple) for iname in _inames: assert isinstance(iname, p.Variable) @@ -712,6 +715,9 @@ class ArrayCall(p.Call): self.function = function self.parameters = parameters + def stringifier(self): + return StringifyMapper + mapper_method = intern("map_array_call") @@ -1771,6 +1777,10 @@ class BatchedAccessRangeMapper(WalkMapper): def map_type_cast(self, expr, inames): return self.rec(expr.child, inames) + def map_sub_array_ref(self, expr, inames): + total_inames = inames | set([iname.name for iname in expr.swept_inames]) + return self.rec(expr.subscript, total_inames) + class AccessRangeMapper(object): -- GitLab From 8f7081478492000b09acc3f541d72a5ba794b0c8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 2 Mar 2018 00:42:29 -0600 Subject: [PATCH 088/116] Added checks while registering a kernel --- loopy/codegen/auxillary_kernels.py | 109 +++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 7 deletions(-) diff --git a/loopy/codegen/auxillary_kernels.py b/loopy/codegen/auxillary_kernels.py index 9dfc04343..ee43e015e 100644 --- a/loopy/codegen/auxillary_kernels.py +++ b/loopy/codegen/auxillary_kernels.py @@ -30,6 +30,12 @@ from loopy.codegen import ( PreambleInfo, CodeGenerationState) from loopy.diagnostic import LoopyError +from loopy.kernel.instruction import ( + ArrayCallInstruction, Assignment, + NoOpInstruction, BarrierInstruction, CallInstruction, + _DataObliviousInstruction) +from cgen import Collection + import logging logger = logging.getLogger(__name__) @@ -45,6 +51,54 @@ __doc__ = """ # {{{ generating compliant kernel for the given instruction +def check_compliance(insn, parent_knl, child_knl, child_to_parent): + + # {{{ getting the parent to child mapping + + parent_to_child = {} + for child, parent in child_to_parent.items(): + parent_to_child[parent] = child + + # }}} + + # {{{ dtype compliance + for arg in child_knl.args: + name_in_parent = child_to_parent[arg.name] + parent_arg = parent_knl.arg_dict[name_in_parent] + if arg.dtype is not None: + assert arg.dtype == parent_arg.dtype, ("While registering kernel the" + "dtypes of variables don't match") + assert arg.stride is None, ("The stride of the child kernel would be written" + "according to the requirement of the parent kernel -- do no set the" + "strides of the child kernel") + # }}} + + """ + # Disabling for now, till I have a function for finding the swept region + # {{{ axes used by the swept_inames + + parent_parameters = insn.expression.parameters + parent_assignees = insn.exression.assignees + for par in parent_parameters + parent_assignees: + inames = par.swept_inames + child_arg = child_knl.arg_dict[parent_to_child[par.name]] + + # check to ensure the equality of number of axes around both the + # kernels + assert len(child_arg.shape) == len(inames), ("regsiter_knl: The ") + + parent_swept_region = par.swept_region() + child_swept_region = child_arg.shape + + for parent_swept, child_swept in zip(parent_swept_region, + child_swept_region): + assert parent_swept == child_swept, ("regsiter_kernel: send only the" + "part of the array you intend to write to the child kernel") + """ + + # }}} + + def get_instruction_specific_kernel(insn, parent_knl, child_knl): """ Generates the kernel with the arguments strided so that it is compliant with the given instruction. Returns the new compliant kernel. @@ -57,18 +111,21 @@ def get_instruction_specific_kernel(insn, parent_knl, child_knl): child_arg_to_parent = {} for child_par, parent_par in zip(child_knl.args, - insn.expression.parameters + insn.assignees): # noqa + insn.expression.parameters + insn.assignees): child_arg_to_parent[child_par.name] = ( parent_par.subscript.aggregate.name) # }}} + check_compliance(insn, parent_knl, child_knl, child_arg_to_parent) + new_args = [] for arg in child_knl.args: - child_dim_tag = ( - dim_tags_dict[child_arg_to_parent[arg.name]]) - new_args.append( - arg.copy(dim_tags=child_dim_tag)) + name_in_parent = child_arg_to_parent[arg.name] + parent_arg = parent_knl.arg_dict[name_in_parent] + child_dim_tag = dim_tags_dict[name_in_parent] + + new_args.append(arg.copy(dim_tags=child_dim_tag, dtype=parent_arg.dtype)) child_knl = child_knl.copy(args=new_args) @@ -165,6 +222,35 @@ def generate_auxillary_kernel_device_code(kernel, target): is_generating_master_kernel=False) from loopy.codegen.result import generate_host_or_device_program + + # {{{ handling auxillary kernels + + auxillary_functions = [] + + for func, aux_knl in kernel.auxillary_kernels.items(): + from loopy.codegen.auxillary_kernels import ( + get_instruction_specific_kernel, + generate_auxillary_kernel_device_code) + for insn in kernel.instructions: + if isinstance(insn, ArrayCallInstruction): + if insn.expression.function.name == func: + compliant_knl = get_instruction_specific_kernel( + insn, kernel, aux_knl) + # TODO: Also need to take input such as allow_complex, + # and preambles from the aux kernels + aux_func = generate_auxillary_kernel_device_code( + compliant_knl, + kernel.target).device_programs[0].ast # noqa + auxillary_functions.append(aux_func) + elif isinstance(insn, (Assignment, NoOpInstruction, Assignment, + BarrierInstruction, CallInstruction, + _DataObliviousInstruction)): + pass + else: + raise NotImplementedError( + "register_knl not made for %s type of" + "instruciton" % (str(type(insn)))) + # }}} codegen_result = generate_host_or_device_program( codegen_state, schedule_index=0) @@ -201,8 +287,17 @@ def generate_auxillary_kernel_device_code(kernel, target): # }}} - # For faster unpickling in the common case when implemented_domains isn't - # needed. + # {{{ Pasting the auxillary functions code to the first device program + # TODO: Currently Sticks all the functions only in the first dev_prog, + # need to identify which function goes with which kernel + new_dev_prog = codegen_result.device_programs[0] + for func in auxillary_functions: + new_dev_prog = new_dev_prog.copy( + ast=Collection([func, new_dev_prog.ast])) + new_device_programs = [new_dev_prog] + codegen_result.device_programs[1:] + codegen_result = codegen_result.copy(device_programs=new_device_programs) + # }}} + # For faster unpickling in the common case when implemented_domains isn't needed. from loopy.tools import LazilyUnpicklingDict codegen_result = codegen_result.copy( implemented_domains=LazilyUnpicklingDict( -- GitLab From 0ef2feb407ca86df99d7459a4a0381b6d7627edc Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 2 Mar 2018 00:50:18 -0600 Subject: [PATCH 089/116] Removed the strides check in auxillary kernls --- loopy/codegen/auxillary_kernels.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/loopy/codegen/auxillary_kernels.py b/loopy/codegen/auxillary_kernels.py index ee43e015e..3f7a0ff0f 100644 --- a/loopy/codegen/auxillary_kernels.py +++ b/loopy/codegen/auxillary_kernels.py @@ -68,9 +68,6 @@ def check_compliance(insn, parent_knl, child_knl, child_to_parent): if arg.dtype is not None: assert arg.dtype == parent_arg.dtype, ("While registering kernel the" "dtypes of variables don't match") - assert arg.stride is None, ("The stride of the child kernel would be written" - "according to the requirement of the parent kernel -- do no set the" - "strides of the child kernel") # }}} """ -- GitLab From fd6ee48474459dc2b3247ab3a24e8de39a6bdb54 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 2 Mar 2018 00:52:05 -0600 Subject: [PATCH 090/116] Removed small error in if-else statement --- loopy/__init__.py | 5 +++-- loopy/codegen/__init__.py | 18 +++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 9c39a38be..100c339ad 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -40,7 +40,8 @@ from loopy.kernel.instruction import ( memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate, InstructionBase, MultiAssignmentBase, Assignment, ExpressionInstruction, - CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction) + CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction, + ArrayCallInstruction) from loopy.kernel.data import ( auto, KernelArgument, @@ -158,7 +159,7 @@ __all__ = [ "InstructionBase", "MultiAssignmentBase", "Assignment", "ExpressionInstruction", "CallInstruction", "CInstruction", "NoOpInstruction", - "BarrierInstruction", + "BarrierInstruction", "ArrayCallInstruction", "KernelArgument", "ValueArg", "GlobalArg", "ConstantArg", "ImageArg", diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index a7fba81ae..108b70a33 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -502,15 +502,15 @@ def generate_code_v2(kernel): get_instruction_specific_kernel, generate_auxillary_kernel_device_code) for insn in kernel.instructions: - if isinstance(insn, ArrayCallInstruction) and ( - insn.expression.function.name == func): - compliant_knl = get_instruction_specific_kernel( - insn, kernel, aux_knl) - # TODO: Also need to take input such as allow_complex, - # and preambles from the aux kernels - aux_func = generate_auxillary_kernel_device_code(compliant_knl, - kernel.target).device_programs[0].ast - auxillary_functions.append(aux_func) + if isinstance(insn, ArrayCallInstruction): + if insn.expression.function.name == func: + compliant_knl = get_instruction_specific_kernel( + insn, kernel, aux_knl) + # TODO: Also need to take input such as allow_complex, + # and preambles from the aux kernels + aux_func = generate_auxillary_kernel_device_code(compliant_knl, + kernel.target).device_programs[0].ast + auxillary_functions.append(aux_func) elif isinstance(insn, (Assignment, NoOpInstruction, Assignment, BarrierInstruction, CallInstruction, _DataObliviousInstruction)): -- GitLab From aa940200c45827f9e9527aa650bd86b24ab37183 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 2 Mar 2018 14:12:22 -0600 Subject: [PATCH 091/116] [ci skip] Start to function_type? --- loopy/kernel/function_type.py | 140 ++++++++++++++++++++++++++++++++++ loopy/kernel/preamble.py | 74 ++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 loopy/kernel/function_type.py create mode 100644 loopy/kernel/preamble.py diff --git a/loopy/kernel/function_type.py b/loopy/kernel/function_type.py new file mode 100644 index 000000000..cd78bdf28 --- /dev/null +++ b/loopy/kernel/function_type.py @@ -0,0 +1,140 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2018 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from pytools import ImmutableRecord +from loopy.diagnostic import LoopyError +from loopy.types import LoopyType +from loopy.kernel.preamble import EmptyPreamble + + +class FunctionTypeBase(ImmutableRecord): + """ A base class for all function declaration types that can occur in a + kernel + + .. attribute:: name + + A :class:`str` which tells the name which idenitifies the function. + + .. attribute:: result_dtypes + + A :class:`tuple` of data types which tell the data types of the output + written by the function. + + .. attribute:: arg_dtypes + + A :class:`tuple` of data types which tell the data types of the + parameters taken in by the function. This might not always be the input + as in the case of function "pass by reference" the parameters as well + as the result are the arguments to the function. + + .. attribute:: target + + A :class:`TargetBase` which tells which is the target for which the + function is defined over. + + .. automethod:: __init__ + """ + + fields = set() + + def __init__(self, name, arg_dtypes, result_dtypes, target, + preamble): + self.name = None + self.arg_dtypes = None + self.result_dtypes = None + self.target = None + self.pass_by_value = None + self.preamble = None + # {{{ sanity checks + + for type in result_dtypes+arg_dtypes: + assert(type, LoopyType) + + # }}} + super(FunctionTypeBase).__init__( + self, + name=name, + arg_dtypes=arg_dtypes, + result_dtypes=result_dtypes, + target=target, + preamble=preamble) + + def get_preambles(self): + if self.preamble is None: + raise LoopyError("Preambles not implemented for %s function type" % ( + type(self))) + + +class PassByValueFunctionType(FunctionTypeBase): + def __init__(self, name, arg_dtypes, result_dtype, target, preamble): + super(PassByReferenceFunctionType).__init__(self, + name=name, + arg_dtypes=arg_dtypes, + result_dtypes=(result_dtype,), + target=target, + pass_by_value=True) + + +class PassByReferenceFunctionType(FunctionTypeBase): + def __init__(self, name, arg_dtypes, result_dtypes, target): + super(PassByReferenceFunctionType).__init__(self, + name=name, + arg_dtypes=arg_dtypes, + result_dtypes=result_dtypes, + target=target, + pass_by_value=False) + def get_preamble(self): + return EmptyPreamble() + pass + + +class MathFunctionType(PassByValueFunctionType): + """ These are the mathematical function which are usually provided by the + target itself. Eg. sin, cos, abs + """ + def __init__(self, name, arg_dtypes, result_dtype, target): + super(MathFunctionType).__init__(self, + name=name, + arg_dtypes=arg_dtypes, + result_dtype=result_dtype, + target=target) + pass + + +class SubstitutionFunctionType(PassByValueFunctionType): + # no idea about this + # do we even need this over here? + def __init__(self): + pass + + +class MultiAssignmentFunctionType(PassByReferenceFunctionType): + def __init__(self, name, arg_dtypes, result_dtypes, target): + super + pass + + +class KernelCallFunctionType(MultiAssignmentFunctionType): + """ This is the kernel mangler which is to be fed to the master kernel. + """ + def __init__(self, kernel): + pass diff --git a/loopy/kernel/preamble.py b/loopy/kernel/preamble.py new file mode 100644 index 000000000..9109ac416 --- /dev/null +++ b/loopy/kernel/preamble.py @@ -0,0 +1,74 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2018 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from pytools import ImmutableRecord + + + + +class PreambleGenerator(ImmutableRecord): + """ A class which is intended to collect all the preambles. This would then + contain the function `generate()`, which would in the end geenrate the final + preamble string. + """ + + def __init__(self, target, premables): + pass + + def generate(self): + pass + + +class PreambleBase(ImmutableRecord): + """ A base class for all the preambles, enountered in Loopy. + """ + def __init__(self): + pass + + +class EmptyPreamble(PreambleBase): + """ This is just the Empty preamble which can be passed by a function whenever it + does need any preambles. For example for some function like `sin` in an OpenCL + code, we do not need any preambles + """ + def __init__(self): + super(EmptyPreamble).__init__(self, + target=None, + preamble=None) + + def generate(self): + return "" + + +class TargetPreamble(PreambleBase): + def __init__(self): + pass + + + +class FunctionPreamble(PreambleBase): + """ This is the preamble for functions. For example while generating a C++ code, + in order to invoke the math function we need to add `#include ` to the + preamble. Such sets of preamble fall in the set of math preambles + """ + def __init__(self, target, preamble_string): + pass -- GitLab From 7958168ddde03c07bf8d822230cece53c1a8d468 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 3 Mar 2018 19:58:06 -0600 Subject: [PATCH 092/116] func-interface structure by Andreas Sir. --- loopy/kernel/func-interface.py | 122 +++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 loopy/kernel/func-interface.py diff --git a/loopy/kernel/func-interface.py b/loopy/kernel/func-interface.py new file mode 100644 index 000000000..d83ed506b --- /dev/null +++ b/loopy/kernel/func-interface.py @@ -0,0 +1,122 @@ +form pytools import ImmutableRecord + + +class ValueArgDescriptor: + """ + .. attribute dtype + """ + pass + + +class ArrayArgDescriptor: + """ + .. attribute:: mem_scope + .. attribute:: shape + .. attribute:: dim_tags + """ + # why is dtype missing over here? + + +class InKernelCallable(ImmutableRecord): + """ + + .. attribute:: name + + """ + + def __init__(self, name): + self.name = name + + super(InKernelCallable).__init__(name=name) + + def copy(self, name=None): + if name is None: + name = self.name + + return InKernelCallable(name=name) + + def with_types(self, arg_id_to_type): + """ + :arg arg_id_to_type: a mapping from argument identifiers + (integers for positional arguments, names for keyword + arguments) to :class:`loopy.types.LoopyType` instances. + Unspecified/unknown types are not represented in *arg_id_to_type*. + + Return values are denoted by negative integers, with the + first returned value identified as *-1*. + + :returns: a tuple ``(new_self, arg_id_to_type)``, where *new_self* is a + new :class:`InKernelCallable` specialized for the given types, + and *arg_id_to_type* is a mapping of the same form as the + argument above, however it may have more information present. + Any argument information exists both by its positional and + its keyword identifier. + """ + + def with_shapes_and_dim_tags(self, arg_id_to_arg_descr): + """ + :arg arg_id_to_type: a mapping from argument identifiers + (integers for positional arguments, names for keyword + arguments) to :class:`loopy.ArrayArgDescriptor` instances. + Unspecified/unknown types are not represented in *arg_id_to_type*. + + Return values are denoted by negative integers, with the + first returned value identified as *-1*. + + :returns: a tuple ``(new_self, arg_id_to_type)``, where *new_self* is a + new :class:`InKernelCallable` specialized for the given types, + and *arg_id_to_type* is a mapping of the same form as the + argument above, however it may have more information present. + Any argument information exists both by its positional and + its keyword identifier. + """ + + def with_iname_tag_usage(self, unusable, concurrent_shape): + """ + :arg unusable: a set of iname tags that may not be used in the callee. + :arg concurrent_shape: an list of tuples ``(iname_tag, bound)`` for + concurrent inames that are used in the calller but also available + for mapping by the callee. *bound* is given as a + :class:`islpy.PwAff`. + + :returns: a list of the same type as *concurrent*, potentially modified + by increasing bounds or adding further iname tag entries. + + All iname tags not explicitly listed in *concurrent* or *unusable* are + available for mapping by the callee. + """ + + def is_arg_written(self, arg_id): + """ + :arg arg_id: (keyword) name or position + """ + pass + + def is_ready_for_code_gen(self): + pass + + # {{{ code generation + + def generate_preambles(self, target): + pass + + def emit_call(self, target): + pass + + # }}} + + def __eq__(self, other): + pass + + def __hash__(self, other): + pass + + +class CallableKernel(InKernelCallable): + """ + .. attribute:: subkernel + + """ + + def copy(self, name=None, subkernel=None): + pass -- GitLab From 4caf8d10c009b5e3312588e471e95e1d5f8d536f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 4 Mar 2018 11:03:16 -0600 Subject: [PATCH 093/116] [ci skip] changing workflow to personal laptop --- loopy/kernel/data.py | 31 -------- loopy/kernel/func-interface.py | 107 +++++++++++++++++++++++-- loopy/kernel/function_type.py | 140 --------------------------------- loopy/kernel/instruction.py | 1 + 4 files changed, 101 insertions(+), 178 deletions(-) delete mode 100644 loopy/kernel/function_type.py diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index c90e8a64b..d09119b9b 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -603,35 +603,4 @@ class SubstitutionRule(ImmutableRecord): # }}} - -# {{{ function call mangling - -class CallMangleInfo(ImmutableRecord): - """ - .. attribute:: target_name - - A string. The name of the function to be called in the - generated target code. - - .. attribute:: result_dtypes - - A tuple of :class:`LoopyType` instances indicating what - types of values the function returns. - - .. attribute:: arg_dtypes - - A tuple of :class:`LoopyType` instances indicating what - types of arguments the function actually receives. - """ - - def __init__(self, target_name, result_dtypes, arg_dtypes): - assert isinstance(result_dtypes, tuple) - - super(CallMangleInfo, self).__init__( - target_name=target_name, - result_dtypes=result_dtypes, - arg_dtypes=arg_dtypes) - -# }}} - # vim: foldmethod=marker diff --git a/loopy/kernel/func-interface.py b/loopy/kernel/func-interface.py index d83ed506b..5d24c6bd5 100644 --- a/loopy/kernel/func-interface.py +++ b/loopy/kernel/func-interface.py @@ -1,4 +1,6 @@ -form pytools import ImmutableRecord +from pytools import ImmutableRecord +from loopy.diagnostic import LoopyError +from loopy.types import LoopyType class ValueArgDescriptor: @@ -14,7 +16,6 @@ class ArrayArgDescriptor: .. attribute:: shape .. attribute:: dim_tags """ - # why is dtype missing over here? class InKernelCallable(ImmutableRecord): @@ -22,10 +23,48 @@ class InKernelCallable(ImmutableRecord): .. attribute:: name + The name of the callable which can be encountered within a kernel. + + .. attrbute:: result_dtype + + The dtypes of the intended result_dtypes + + .. attrbute:: arg_dypes + + The dtypes of the intended arg_dtypes + + .. note:: + + How do I generate an association from arg_id keyword to position. I am pretty + sure it has to be an input attribute for this one. + """ - def __init__(self, name): + def __init__(self, name, arg_dtypes, result_dtypes): + + # {{{ sanity checks + + if not isinstance(name, str): + raise LoopyError("name of a InKernelCallable should be a string") + + if not isinstance(arg_dtypes, tuple): + raise LoopyError("arg_dtypes of a InKernelCallable should be a tuple") + + if not isinstance(result_dtypes, tuple): + raise LoopyError("result_dtypes of a InKernelCallable should be a tuple") + + for arg in arg_dtypes: + if not isinstance(arg, LoopyType): + raise LoopyError("arg_dtypes must have entries of type LoopyType") + for res in result_dtypes: + if not isinstance(res, LoopyType): + raise LoopyError("result_dtypes must have entries of type LoopyType") + + # }}} + self.name = name + self.arg_dtypes = arg_dtypes + self.result_dtypes = result_dtypes super(InKernelCallable).__init__(name=name) @@ -53,12 +92,14 @@ class InKernelCallable(ImmutableRecord): its keyword identifier. """ + raise NotImplementedError() + def with_shapes_and_dim_tags(self, arg_id_to_arg_descr): """ - :arg arg_id_to_type: a mapping from argument identifiers + :arg arg_id_to_arg_descr: a mapping from argument identifiers (integers for positional arguments, names for keyword arguments) to :class:`loopy.ArrayArgDescriptor` instances. - Unspecified/unknown types are not represented in *arg_id_to_type*. + Unspecified/unknown types are not represented in *arg_id_to_descr*. Return values are denoted by negative integers, with the first returned value identified as *-1*. @@ -71,6 +112,8 @@ class InKernelCallable(ImmutableRecord): its keyword identifier. """ + raise NotImplementedError() + def with_iname_tag_usage(self, unusable, concurrent_shape): """ :arg unusable: a set of iname tags that may not be used in the callee. @@ -86,6 +129,8 @@ class InKernelCallable(ImmutableRecord): available for mapping by the callee. """ + raise NotImplementedError() + def is_arg_written(self, arg_id): """ :arg arg_id: (keyword) name or position @@ -93,7 +138,7 @@ class InKernelCallable(ImmutableRecord): pass def is_ready_for_code_gen(self): - pass + return False # {{{ code generation @@ -112,6 +157,45 @@ class InKernelCallable(ImmutableRecord): pass +class CallableMath(InKernelCallable): + """ A class to contain all the standard math library functions like sin, cos, + abs, etu + """ + + def __init__(self, name, result_dtype, arg_dtypes): + super(CallableMath).__init__(self, + name=name, + result_dtypes=(result_dtype,), + arg_dtypes=arg_dtypes) + + def is_arg_written(self, arg_id): + """ + :arg arg_id: (keyword) name or position + """ + if arg_id < 0: + return True + else: + return False + + # {{{ codegen + + def is_ready_for_code_gen(self): + return True + + def generate_preambles(self, target): + pass + + def emit_call(self, target): + pass + + # }}} + + +class CallableReduction(InKernelCallable): + """ Contains information about the functions which are encountered as reductions. + """ + + class CallableKernel(InKernelCallable): """ .. attribute:: subkernel @@ -119,4 +203,13 @@ class CallableKernel(InKernelCallable): """ def copy(self, name=None, subkernel=None): - pass + if name is None: + name = self.name + + if subkernel is None: + subkernel = self.subkernel + + return CallableKernel(name=name, + subkernel=subkernel) + +# vim: foldmethod=marker diff --git a/loopy/kernel/function_type.py b/loopy/kernel/function_type.py deleted file mode 100644 index cd78bdf28..000000000 --- a/loopy/kernel/function_type.py +++ /dev/null @@ -1,140 +0,0 @@ -from __future__ import division, absolute_import - -__copyright__ = "Copyright (C) 2018 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -from pytools import ImmutableRecord -from loopy.diagnostic import LoopyError -from loopy.types import LoopyType -from loopy.kernel.preamble import EmptyPreamble - - -class FunctionTypeBase(ImmutableRecord): - """ A base class for all function declaration types that can occur in a - kernel - - .. attribute:: name - - A :class:`str` which tells the name which idenitifies the function. - - .. attribute:: result_dtypes - - A :class:`tuple` of data types which tell the data types of the output - written by the function. - - .. attribute:: arg_dtypes - - A :class:`tuple` of data types which tell the data types of the - parameters taken in by the function. This might not always be the input - as in the case of function "pass by reference" the parameters as well - as the result are the arguments to the function. - - .. attribute:: target - - A :class:`TargetBase` which tells which is the target for which the - function is defined over. - - .. automethod:: __init__ - """ - - fields = set() - - def __init__(self, name, arg_dtypes, result_dtypes, target, - preamble): - self.name = None - self.arg_dtypes = None - self.result_dtypes = None - self.target = None - self.pass_by_value = None - self.preamble = None - # {{{ sanity checks - - for type in result_dtypes+arg_dtypes: - assert(type, LoopyType) - - # }}} - super(FunctionTypeBase).__init__( - self, - name=name, - arg_dtypes=arg_dtypes, - result_dtypes=result_dtypes, - target=target, - preamble=preamble) - - def get_preambles(self): - if self.preamble is None: - raise LoopyError("Preambles not implemented for %s function type" % ( - type(self))) - - -class PassByValueFunctionType(FunctionTypeBase): - def __init__(self, name, arg_dtypes, result_dtype, target, preamble): - super(PassByReferenceFunctionType).__init__(self, - name=name, - arg_dtypes=arg_dtypes, - result_dtypes=(result_dtype,), - target=target, - pass_by_value=True) - - -class PassByReferenceFunctionType(FunctionTypeBase): - def __init__(self, name, arg_dtypes, result_dtypes, target): - super(PassByReferenceFunctionType).__init__(self, - name=name, - arg_dtypes=arg_dtypes, - result_dtypes=result_dtypes, - target=target, - pass_by_value=False) - def get_preamble(self): - return EmptyPreamble() - pass - - -class MathFunctionType(PassByValueFunctionType): - """ These are the mathematical function which are usually provided by the - target itself. Eg. sin, cos, abs - """ - def __init__(self, name, arg_dtypes, result_dtype, target): - super(MathFunctionType).__init__(self, - name=name, - arg_dtypes=arg_dtypes, - result_dtype=result_dtype, - target=target) - pass - - -class SubstitutionFunctionType(PassByValueFunctionType): - # no idea about this - # do we even need this over here? - def __init__(self): - pass - - -class MultiAssignmentFunctionType(PassByReferenceFunctionType): - def __init__(self, name, arg_dtypes, result_dtypes, target): - super - pass - - -class KernelCallFunctionType(MultiAssignmentFunctionType): - """ This is the kernel mangler which is to be fed to the master kernel. - """ - def __init__(self, kernel): - pass diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index fa8ae592b..8b79fbe27 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1129,6 +1129,7 @@ class ArrayCallInstruction(MultiAssignmentBase): # FIXME: Currently assumes that all the assignees share the same inames # FIXME: Change this change this CHANGE THIS... + # This should be the union of all such happenings self.within_inames = frozenset(set(self.assignee_subscript_deps()[0])) if temp_var_types is None: -- GitLab From a9100da9086d6b1b5f6877176fb5e6ff13e7954b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 4 Mar 2018 19:14:37 -0600 Subject: [PATCH 094/116] Reverted the deletion of CallMangleInfo --- loopy/kernel/data.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index d09119b9b..a62ab0574 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -603,4 +603,30 @@ class SubstitutionRule(ImmutableRecord): # }}} + +# {{{ function call mangling + +class CallMangleInfo(ImmutableRecord): + """ + .. attribute:: target_name + A string. The name of the function to be called in the + generated target code. + .. attribute:: result_dtypes + A tuple of :class:`LoopyType` instances indicating what + types of values the function returns. + .. attribute:: arg_dtypes + A tuple of :class:`LoopyType` instances indicating what + types of arguments the function actually receives. + """ + + def __init__(self, target_name, result_dtypes, arg_dtypes): + assert isinstance(result_dtypes, tuple) + + super(CallMangleInfo, self).__init__( + target_name=target_name, + result_dtypes=result_dtypes, + arg_dtypes=arg_dtypes) + +# }}} + # vim: foldmethod=marker -- GitLab From 9c2c98b600e0cec42dc20b8bed955e4779dbd115 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 4 Mar 2018 22:50:31 -0600 Subject: [PATCH 095/116] Wrapping up the definition of function_interface.py --- ...unc-interface.py => function_interface.py} | 161 +++++++++++------- 1 file changed, 95 insertions(+), 66 deletions(-) rename loopy/kernel/{func-interface.py => function_interface.py} (52%) diff --git a/loopy/kernel/func-interface.py b/loopy/kernel/function_interface.py similarity index 52% rename from loopy/kernel/func-interface.py rename to loopy/kernel/function_interface.py index 5d24c6bd5..2fa6f824d 100644 --- a/loopy/kernel/func-interface.py +++ b/loopy/kernel/function_interface.py @@ -1,22 +1,60 @@ from pytools import ImmutableRecord from loopy.diagnostic import LoopyError -from loopy.types import LoopyType +from collections.abc import Mapping -class ValueArgDescriptor: + +# {{{ argument descriptors + +class ArgDescriptor(ImmutableRecord): + """Base type of argument description about the variable type that is supposed to + be encountered in a function signature. + .. attribute:: dtype + .. attribute:: mem_scope + .. attribute:: shape + .. attribute:: dim_tags + """ + + def __init__(self, + dtype=None, + mem_scope=None, + shape=None, + dim_tags=None): + super(ArgDescriptor).__init__(self, + dtype=dtype, + shape=shape, + dim_tags=dim_tags) + + +class ValueArgDescriptor(ArgDescriptor): """ .. attribute dtype """ - pass + def __init__(self, dtype): + super(ValueArgDescriptor).__init__(self, + dtype=dtype) -class ArrayArgDescriptor: +class ArrayArgDescriptor(ArgDescriptor): """ + .. attribute:: dtype .. attribute:: mem_scope .. attribute:: shape .. attribute:: dim_tags """ + def __init__(self, + dtype=None, + mem_scope=None, + shape=None, + dim_tags=None): + super(ArgDescriptor).__init__(self, + dtype=dtype, + shape=shape, + dim_tags=dim_tags) + +# }}} + class InKernelCallable(ImmutableRecord): """ @@ -25,48 +63,65 @@ class InKernelCallable(ImmutableRecord): The name of the callable which can be encountered within a kernel. - .. attrbute:: result_dtype + .. attrbute:: arg_id_to_descr - The dtypes of the intended result_dtypes + A mapping from the id to dtypes of the argument - .. attrbute:: arg_dypes + .. attribute:: arg_id_to_keyword - The dtypes of the intended arg_dtypes + A mapping from the id to the keyword of the argument. .. note:: - How do I generate an association from arg_id keyword to position. I am pretty - sure it has to be an input attribute for this one. + Negative ids in the mapping attributes indicate the result arguments """ - def __init__(self, name, arg_dtypes, result_dtypes): + def __init__(self, name, arg_id_to_descr, arg_id_to_keyword): # {{{ sanity checks if not isinstance(name, str): raise LoopyError("name of a InKernelCallable should be a string") - if not isinstance(arg_dtypes, tuple): - raise LoopyError("arg_dtypes of a InKernelCallable should be a tuple") + if not isinstance(arg_id_to_descr, Mapping): + raise LoopyError("arg_id_to_dtype of a InKernelCallable should be a" + "mapping") - if not isinstance(result_dtypes, tuple): - raise LoopyError("result_dtypes of a InKernelCallable should be a tuple") + if not isinstance(arg_id_to_keyword, Mapping): + raise LoopyError("arg_id_to_keyword of a InKernelCallable should be a" + "mapping") - for arg in arg_dtypes: - if not isinstance(arg, LoopyType): + for arg_id, dtype in arg_id_to_descr.items(): + if not isinstance(dtype, ArgDescriptor): raise LoopyError("arg_dtypes must have entries of type LoopyType") - for res in result_dtypes: - if not isinstance(res, LoopyType): - raise LoopyError("result_dtypes must have entries of type LoopyType") + + for arg_id, keyword in arg_id_to_keyword.items(): + if not isinstance(dtype, str): + raise LoopyError("keyword must be a string") + + assert len(arg_id_to_descr) == len(arg_id_to_keyword), ("Length of the" + "keyword and the dtype mapping must be same.") + + id_set = frozenset([id for id in arg_id_to_descr]) + assert len(arg_id_to_descr) == max(id_set) - min(id_set) + 1, ("Either" + "repeated id encountered in arg_id_dtype or missed the dtype of some" + "of the keyword") # }}} self.name = name - self.arg_dtypes = arg_dtypes - self.result_dtypes = result_dtypes + self.arg_id_to_descr = arg_id_to_descr + self.arg_id_to_keyword = arg_id_to_keyword + + # creating a inverse mapping which would be used a lot in the implementation + self.keyword_to_arg_id = {} + for arg_id, keyword in arg_id_to_keyword.items(): + self.keyword_to_arg_id[keyword] = arg_id - super(InKernelCallable).__init__(name=name) + super(InKernelCallable).__init__(name=name, + arg_id_to_descr=arg_id_to_descr, + arg_id_to_keyword=arg_id_to_keyword) def copy(self, name=None): if name is None: @@ -74,7 +129,7 @@ class InKernelCallable(ImmutableRecord): return InKernelCallable(name=name) - def with_types(self, arg_id_to_type): + def with_types(self, arg_id_to_descr): """ :arg arg_id_to_type: a mapping from argument identifiers (integers for positional arguments, names for keyword @@ -91,6 +146,13 @@ class InKernelCallable(ImmutableRecord): Any argument information exists both by its positional and its keyword identifier. """ + # An example for this would be interpreting the function call. + # The example func(1+j, 1) should be converted to func(1+j, 1+0j) + # Now what I don;t understand is why would the new_self. + # So this is a general function which says implement so and so function + # with so and so dtypes. + # So something like double sin(double x) is the function, then with + # types would be a transformation which would raise NotImplementedError() @@ -135,14 +197,18 @@ class InKernelCallable(ImmutableRecord): """ :arg arg_id: (keyword) name or position """ - pass + + raise NotImplementedError() def is_ready_for_code_gen(self): + return False # {{{ code generation def generate_preambles(self, target): + """ This would generate the target specific preamble. + """ pass def emit_call(self, target): @@ -151,49 +217,12 @@ class InKernelCallable(ImmutableRecord): # }}} def __eq__(self, other): - pass + return (self.name == other.name + and self.arg_id_to_descr == other.arg_id_to_descr + and self.arg_id_to_keyword == other.arg_id_to_keyword) def __hash__(self, other): - pass - - -class CallableMath(InKernelCallable): - """ A class to contain all the standard math library functions like sin, cos, - abs, etu - """ - - def __init__(self, name, result_dtype, arg_dtypes): - super(CallableMath).__init__(self, - name=name, - result_dtypes=(result_dtype,), - arg_dtypes=arg_dtypes) - - def is_arg_written(self, arg_id): - """ - :arg arg_id: (keyword) name or position - """ - if arg_id < 0: - return True - else: - return False - - # {{{ codegen - - def is_ready_for_code_gen(self): - return True - - def generate_preambles(self, target): - pass - - def emit_call(self, target): - pass - - # }}} - - -class CallableReduction(InKernelCallable): - """ Contains information about the functions which are encountered as reductions. - """ + return hash((self.name, self.arg_id_to_descr, self.arg_id_to_keyword)) class CallableKernel(InKernelCallable): -- GitLab From b794b79747401e5644857097115bb9a7c872b116 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 5 Mar 2018 03:31:57 -0600 Subject: [PATCH 096/116] Added a definition for a typical pass by value function signature --- loopy/kernel/function_interface.py | 70 ++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 2fa6f824d..9d1be67ba 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -2,6 +2,7 @@ from pytools import ImmutableRecord from loopy.diagnostic import LoopyError from collections.abc import Mapping +from loopy.types import LoopyType # {{{ argument descriptors @@ -56,6 +57,8 @@ class ArrayArgDescriptor(ArgDescriptor): # }}} +# {{{ in kernel callable + class InKernelCallable(ImmutableRecord): """ @@ -224,6 +227,70 @@ class InKernelCallable(ImmutableRecord): def __hash__(self, other): return hash((self.name, self.arg_id_to_descr, self.arg_id_to_keyword)) +# }}} + + +# {{{ pass by value functions + +class PassByValueFunction(InKernelCallable): + """ A class for expressing the c-math library calls. All the functions are + assumed of the following function signature: z = f(a, b, c, d, ...) depending on + the arity. + + .. attribute:: name + + The name of the function as would be encountered in loopy. + + .. attribute:: dtype + + A :class:`LoopyType` which would indicate the dtypes encountered throughout + the function signature + + .. attribute:: arity + + Integer defining the whether the operation is unary or binary + + """ + + def __init__(self, name=None, dtype=None, arity=None): + + # {{{ sanity checks + + assert isinstance(name, str) + assert isinstance(dtype, LoopyType) + assert isinstance(arity, int) + + # }}} + + # {{{ preparing arguments for the superclass + + arg_id_to_descr = {} + arg_id_to_keyword = {} + + for i in range(arity): + arg_id_to_descr[i] = ValueArgDescriptor(dtype=dtype) + arg_id_to_keyword[i] = chr(ord('a')+1) + + arg_id_to_descr[-1] = ValueArgDescriptor(dtype=dtype) + arg_id_to_keyword[-1] = 'z' + + # }}} + + super(PassByValueFunction).__init__(self, + name=name, + arg_id_to_descr=arg_id_to_descr, + arg_id_to_keyword=arg_id_to_keyword) + + def emit_call(self, target): + raise NotImplementedError() + + def get_preamble(self, target): + raise NotImplementedError() + +# }}} + + +# {{{ callable kernel class CallableKernel(InKernelCallable): """ @@ -241,4 +308,7 @@ class CallableKernel(InKernelCallable): return CallableKernel(name=name, subkernel=subkernel) +# }}} + + # vim: foldmethod=marker -- GitLab From 47ea63e2e1742cd782d6ae6963648d0e6166ccfe Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 5 Mar 2018 03:32:50 -0600 Subject: [PATCH 097/116] Removed CallMangleInfo for good --- loopy/kernel/data.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index a62ab0574..fd2b5c763 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -604,29 +604,4 @@ class SubstitutionRule(ImmutableRecord): # }}} -# {{{ function call mangling - -class CallMangleInfo(ImmutableRecord): - """ - .. attribute:: target_name - A string. The name of the function to be called in the - generated target code. - .. attribute:: result_dtypes - A tuple of :class:`LoopyType` instances indicating what - types of values the function returns. - .. attribute:: arg_dtypes - A tuple of :class:`LoopyType` instances indicating what - types of arguments the function actually receives. - """ - - def __init__(self, target_name, result_dtypes, arg_dtypes): - assert isinstance(result_dtypes, tuple) - - super(CallMangleInfo, self).__init__( - target_name=target_name, - result_dtypes=result_dtypes, - arg_dtypes=arg_dtypes) - -# }}} - # vim: foldmethod=marker -- GitLab From 912a6cc8a2bc6d52ba6a437b7d90f3dcb482977b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 5 Mar 2018 03:34:38 -0600 Subject: [PATCH 098/116] No more CallMangleInfo --- loopy/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 100c339ad..ddfbf1624 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -47,8 +47,7 @@ from loopy.kernel.data import ( KernelArgument, ValueArg, GlobalArg, ConstantArg, ImageArg, temp_var_scope, TemporaryVariable, - SubstitutionRule, - CallMangleInfo) + SubstitutionRule) from loopy.kernel import LoopKernel, kernel_state from loopy.kernel.tools import ( @@ -164,8 +163,7 @@ __all__ = [ "KernelArgument", "ValueArg", "GlobalArg", "ConstantArg", "ImageArg", "temp_var_scope", "TemporaryVariable", - "SubstitutionRule", - "CallMangleInfo", + "SubstitutionRule" "default_function_mangler", "single_arg_function_mangler", -- GitLab From bc4738a3b49acd4faf439a1514e4ff54acf04d9c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 6 Mar 2018 02:21:45 -0600 Subject: [PATCH 099/116] removed CallMangleInfo from c/__init__.py --- loopy/kernel/function_interface.py | 42 ++++++------------------------ loopy/target/c/__init__.py | 22 ++++++++-------- 2 files changed, 19 insertions(+), 45 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 9d1be67ba..595e96e9a 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -1,3 +1,5 @@ +import numpy as np + from pytools import ImmutableRecord from loopy.diagnostic import LoopyError @@ -241,45 +243,17 @@ class PassByValueFunction(InKernelCallable): The name of the function as would be encountered in loopy. - .. attribute:: dtype - - A :class:`LoopyType` which would indicate the dtypes encountered throughout - the function signature - - .. attribute:: arity - - Integer defining the whether the operation is unary or binary - """ - def __init__(self, name=None, dtype=None, arity=None): - - # {{{ sanity checks + def __init__(self, name=None, arg_dtypes=None, result_dtype=None): - assert isinstance(name, str) - assert isinstance(dtype, LoopyType) - assert isinstance(arity, int) + self.arg_dtypes = arg_dtypes - # }}} + if result_dtype is None: + result_dtype = np.find_common_type( + [], [dtype.numpy_dtype for dtype in arg_dtypes]) - # {{{ preparing arguments for the superclass - - arg_id_to_descr = {} - arg_id_to_keyword = {} - - for i in range(arity): - arg_id_to_descr[i] = ValueArgDescriptor(dtype=dtype) - arg_id_to_keyword[i] = chr(ord('a')+1) - - arg_id_to_descr[-1] = ValueArgDescriptor(dtype=dtype) - arg_id_to_keyword[-1] = 'z' - - # }}} - - super(PassByValueFunction).__init__(self, - name=name, - arg_id_to_descr=arg_id_to_descr, - arg_id_to_keyword=arg_id_to_keyword) + self.result_dtype = result_dtype def emit_call(self, target): raise NotImplementedError() diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e944b4e9f..d07cf5d85 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -27,7 +27,7 @@ THE SOFTWARE. import six import numpy as np # noqa -from loopy.kernel.data import CallMangleInfo +from loopy.kernel.function_interface import PassByValueFunction, InKernelCallable from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder from loopy.diagnostic import LoopyError, LoopyTypeError from cgen import Pointer, NestedDeclarator, Block @@ -386,10 +386,10 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) - return CallMangleInfo( - target_name=name, - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) + return PassByValueFunction( + name=name, + arg_dtypes=arg_dtypes, + result_dtype=arg_dtypes[0]) # binary functions if (name in ["fmax", "fmin"] @@ -414,10 +414,10 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): % (name, dtype)) result_dtype = NumpyType(dtype) - return CallMangleInfo( - target_name=name, - result_dtypes=(result_dtype,), - arg_dtypes=2*(result_dtype,)) + return PassByValueFunction( + name=name, + arg_dtypes=2*(result_dtype,), + result_dtypes=result_dtype) return None @@ -986,8 +986,8 @@ class CASTBuilder(ASTBuilderBase): # FIXME: This has to be interpreted from the kernel # Assumption: That the compatibilty checks would be done by # `register_knl` - mangle_result = CallMangleInfo( - target_name=func_id, + mangle_result = InKernelCallable( + name=1/0, result_dtypes=par_dtypes, arg_dtypes=par_dtypes) -- GitLab From 80ac284ee06e8ba7a30b362a87ed414325a4597d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 6 Mar 2018 03:04:50 -0600 Subject: [PATCH 100/116] Completely replaced CallMangleInfo with PassByValueFunction --- loopy/kernel/__init__.py | 8 +++--- loopy/kernel/function_interface.py | 4 ++- loopy/target/c/codegen/expression.py | 7 +++--- loopy/target/opencl.py | 37 ++++++++++++++-------------- loopy/target/pyopencl.py | 21 ++++++++-------- loopy/type_inference.py | 8 ++++-- 6 files changed, 46 insertions(+), 39 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 01580caed..ae49d30ee 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -314,8 +314,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): for mangler in manglers: mangle_result = mangler(self, identifier, arg_dtypes) if mangle_result is not None: - from loopy.kernel.data import CallMangleInfo - if isinstance(mangle_result, CallMangleInfo): + from loopy.kernel.function_interface import InKernelCallable + if isinstance(mangle_result, InKernelCallable): assert len(mangle_result.arg_dtypes) == len(arg_dtypes) return mangle_result @@ -325,7 +325,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): warn("'%s' returned a tuple instead of a CallMangleInfo instance. " "This is deprecated." % mangler.__name__, DeprecationWarning) - + ''' if len(mangle_result) == 2: result_dtype, target_name = mangle_result return CallMangleInfo( @@ -339,10 +339,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): target_name=target_name, result_dtypes=(result_dtype,), arg_dtypes=actual_arg_dtypes) - else: raise ValueError("unexpected size of tuple returned by '%s'" % mangler.__name__) + ''' return None diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 595e96e9a..4335c626d 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -247,6 +247,7 @@ class PassByValueFunction(InKernelCallable): def __init__(self, name=None, arg_dtypes=None, result_dtype=None): + self.name = name self.arg_dtypes = arg_dtypes if result_dtype is None: @@ -256,7 +257,8 @@ class PassByValueFunction(InKernelCallable): self.result_dtype = result_dtype def emit_call(self, target): - raise NotImplementedError() + raise NotImplementedError("PassByValueFUnction is always encountered" + "through an assignment") def get_preamble(self, target): raise NotImplementedError() diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 3bb1e86b9..b35b9b26d 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -40,6 +40,7 @@ from pymbolic import var from loopy.expression import dtype_to_type_context from loopy.type_inference import TypeInferenceMapper +from loopy.kernel.function_interface import PassByValueFunction from loopy.diagnostic import LoopyError, LoopyWarning from loopy.tools import is_integer @@ -448,7 +449,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): "maybe you need to register a function mangler?" % identifier) - if len(mangle_result.result_dtypes) != 1: + if not isinstance(mangle_result, PassByValueFunction): raise LoopyError("functions with more or fewer than one return value " "may not be used in an expression") @@ -478,10 +479,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( SeenFunction(identifier, - mangle_result.target_name, + mangle_result.name, mangle_result.arg_dtypes or par_dtypes)) - return var(mangle_result.target_name)(*processed_parameters) + return var(mangle_result.name)(*processed_parameters) # {{{ deal with complex-valued variables diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 9767aeaf4..bb1b59c8b 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -32,7 +32,8 @@ from pytools import memoize_method from loopy.diagnostic import LoopyError from loopy.types import NumpyType from loopy.target.c import DTypeRegistryWrapper, c_math_mangler -from loopy.kernel.data import temp_var_scope, CallMangleInfo +from loopy.kernel.data import temp_var_scope +from loopy.kernel.function_interface import PassByValueFunction from pymbolic import var from functools import partial @@ -176,17 +177,17 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if dtype.kind == "i": result_dtype = NumpyType(dtype) - return CallMangleInfo( - target_name=name, - result_dtypes=(result_dtype,), - arg_dtypes=2*(result_dtype,)) + return PassByValueFunction( + name=name, + arg_dtypes=2*(result_dtype,), + result_dtypes=result_dtype) if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] - return CallMangleInfo( - target_name=name, - result_dtypes=(NumpyType(scalar_dtype),), - arg_dtypes=(arg_dtypes[0],)*2) + return PassByValueFunction( + name=name, + arg_dtypes=(arg_dtypes[0],)*2, + result_dtype=NumpyType(scalar_dtype)) if name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS: num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name] @@ -202,10 +203,10 @@ def opencl_function_mangler(kernel, name, arg_dtypes): % name) result_dtype = NumpyType(dtype) - return CallMangleInfo( - target_name=name, - result_dtypes=(result_dtype,), - arg_dtypes=(result_dtype,)*num_args) + return PassByValueFunction( + name=name, + arg_dtypes=(result_dtype,)*num_args, + result_dtype=result_dtype) if name in VECTOR_LITERAL_FUNCS: base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name] @@ -213,11 +214,11 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if count != len(arg_dtypes): return None - return CallMangleInfo( - target_name="(%s%d) " % (base_tp_name, count), - result_dtypes=(kernel.target.vector_dtype( - NumpyType(dtype), count),), - arg_dtypes=(NumpyType(dtype),)*count) + return PassByValueFunction( + name="(%s%d) " % (base_tp_name, count), + arg_dtypes=(NumpyType(dtype),)*count, + result_dtypes=kernel.target.vector_dtype( + NumpyType(dtype), count)) return None diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 744c03d8e..504fb30ca 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -31,7 +31,7 @@ from six.moves import range import numpy as np -from loopy.kernel.data import CallMangleInfo +from loopy.kernel.function_interface import PassByValueFunction from loopy.target.opencl import OpenCLTarget, OpenCLCASTBuilder from loopy.target.python import PythonASTBuilderBase from loopy.types import NumpyType @@ -215,18 +215,17 @@ def pyopencl_function_mangler(target, name, arg_dtypes): "sin", "cos", "tan", "sinh", "cosh", "tanh", "conj"]: - return CallMangleInfo( - target_name="%s_%s" % (tpname, name), - result_dtypes=(arg_dtype,), - arg_dtypes=(arg_dtype,)) + return PassByValueFunction( + name="%s_%s" % (tpname, name), + arg_dtypes=(arg_dtype,), + result_dtype=arg_dtype) if name in ["real", "imag", "abs"]: - return CallMangleInfo( - target_name="%s_%s" % (tpname, name), - result_dtypes=(NumpyType( - np.dtype(arg_dtype.numpy_dtype.type(0).real)), - ), - arg_dtypes=(arg_dtype,)) + return PassByValueFunction( + name="%s_%s" % (tpname, name), + arg_dtypes=(arg_dtype,), + result_dtypes=NumpyType( + np.dtype(arg_dtype.numpy_dtype.type(0).real))) return None diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 32fefed7b..6c8cf8466 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -34,7 +34,11 @@ from loopy.diagnostic import ( LoopyError, TypeInferenceFailure, DependencyTypeInferenceFailure) +from loopy.kernel.function_interface import PassByValueFunction + import logging + + logger = logging.getLogger(__name__) @@ -276,11 +280,11 @@ class TypeInferenceMapper(CombineMapper): return [mangle_result.result_dtypes] else: if mangle_result is not None: - if len(mangle_result.result_dtypes) != 1 and not return_tuple: + if not isinstance(mangle_result, PassByValueFunction): raise LoopyError("functions with more or fewer than one " "return value may only be used in direct assignments") - return [mangle_result.result_dtypes[0]] + return [mangle_result.result_dtype] raise RuntimeError("unable to resolve " "function '%s' with %d given arguments" -- GitLab From f8883406a8c5f90bae05d82f00ef2f43cc85a66b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 00:21:29 -0600 Subject: [PATCH 101/116] Removed the ArrayCallInstruction --- loopy/kernel/instruction.py | 173 ++++-------------------------------- 1 file changed, 15 insertions(+), 158 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 8b79fbe27..2c877e360 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1027,158 +1027,27 @@ class CallInstruction(MultiAssignmentBase): result += "\n" + 10*" " + "if (%s)" % " && ".join(self.predicates) return result - @property - def atomicity(self): - # Function calls can impossibly be atomic, and even the result assignment - # is troublesome, especially in the case of multiple results. Avoid the - # issue altogether by disallowing atomicity. - return () - -# }}} - - -# {{{ instruction: function call over an array - -class ArrayCallInstruction(MultiAssignmentBase): - """An instruction capturing a function call on array. Unlike - :class:`ArrayCallInstruction`, this performs operations on arrays and is - eventually supposed to have invocations from "library-functions" - and this instruction supports functions with multiple return values. - - .. attribute:: assignees - - A :class:`tuple` of left-hand sides for the assignment - - .. attribute:: expression - - The expression that dictates the RHS - - The following attributes are only used until - :func:`loopy.make_kernel` is finished: - - .. attribute:: temp_var_types - - if not *None*, a type that will be assigned to the new temporary variable - created from the assignee - - .. automethod:: __init__ - """ - - fields = MultiAssignmentBase.fields | \ - set("assignees temp_var_types".split()) - pymbolic_fields = MultiAssignmentBase.pymbolic_fields | set(["assignees"]) - - def __init__(self, - assignees, expression, - id=None, - depends_on=None, - depends_on_is_final=None, - groups=None, - conflicts_with_groups=None, - no_sync_with=None, - within_inames_is_final=None, - within_inames=None, - boostable=None, boostable_into=None, tags=None, - temp_var_types=None, - priority=0, predicates=frozenset(), - insn_deps=None, insn_deps_is_final=None, - forced_iname_deps=None, - forced_iname_deps_is_final=None): - - super(ArrayCallInstruction, self).__init__( - id=id, - depends_on=depends_on, - depends_on_is_final=depends_on_is_final, - groups=groups, - conflicts_with_groups=conflicts_with_groups, - no_sync_with=no_sync_with, - within_inames_is_final=within_inames_is_final, - within_inames=within_inames, - boostable=boostable, - boostable_into=boostable_into, - priority=priority, - predicates=predicates, - tags=tags, - insn_deps=insn_deps, - insn_deps_is_final=insn_deps_is_final, - forced_iname_deps=forced_iname_deps, - forced_iname_deps_is_final=forced_iname_deps_is_final) - - from loopy.symbolic import ArrayCall - if not isinstance(expression, ArrayCall) and expression is not None: - raise LoopyError("'expression' argument to CallInstruction " - "must be a ArrayCall") - - from loopy.symbolic import parse - if isinstance(assignees, str): - assignees = parse(assignees) - if not isinstance(assignees, tuple): - raise LoopyError("'assignees' argument to CallInstruction " - "must be a tuple or a string parseable to a tuple" - "--got '%s'" % type(assignees).__name__) - - from pymbolic.primitives import Variable, Subscript - from loopy.symbolic import LinearSubscript, SubArrayRef - for assignee in assignees: - if not isinstance(assignee, (Variable, Subscript, LinearSubscript, - SubArrayRef)): - raise LoopyError("invalid lvalue '%s'" % assignee) - - self.assignees = assignees - self.expression = expression - - # FIXME: Currently assumes that all the assignees share the same inames - # FIXME: Change this change this CHANGE THIS... - # This should be the union of all such happenings - self.within_inames = frozenset(set(self.assignee_subscript_deps()[0])) - - if temp_var_types is None: - self.temp_var_types = (None,) * len(self.assignees) - else: - self.temp_var_types = temp_var_types - - # {{{ implement InstructionBase interface - - # FIXME: Didn't look into this. Maybe need to replace this - @memoize_method - def assignee_var_names(self): - return tuple(_get_assignee_var_name(a) for a in self.assignees) - - def assignee_subscript_deps(self): - return tuple( - _get_assignee_subscript_deps(a) - for a in self.assignees) - - def with_transformed_expressions(self, f, *args): - return self.copy( - assignees=f(self.assignees, *args), - expression=f(self.expression, *args), - predicates=frozenset( - f(pred, *args) for pred in self.predicates)) + def get_parameters_dim_tag_dict(self, arg_dict): - # }}} + from loopy.symbolic import SubArrayRef - def get_parameters_dim_tag_dict(self, arg_dict): dim_tags_dict = {} for par in (self.assignees + self.expression.parameters): - arg_name = par.subscript.aggregate.name - dim_tags_dict[arg_name] = par.get_inner_dim_tags( - arg_dict[arg_name].dim_tags) + if isinstance(par, SubArrayRef): + arg_name = par.subscript.aggregate.name + dim_tags_dict[arg_name] = par.get_inner_dim_tags( + arg_dict[arg_name].dim_tags) return dim_tags_dict - def __str__(self): - result = "%s: %s <- %s" % (self.id, - ", ".join(str(a) for a in self.assignees), - self.expression) + @property() + def is_array_call(self): + from loopy.symbolic import SubArrayRef + for arg in self.assignees + self.expression.parameters: + if isinstance(arg, SubArrayRef): + return True - options = self.get_str_options() - if options: - result += " {%s}" % (": ".join(options)) - - if self.predicates: - result += "\n" + 10*" " + "if (%s)" % " && ".join(self.predicates) - return result + return False @property def atomicity(self): @@ -1198,8 +1067,8 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): "left-hand side not supported") from pymbolic.primitives import Call - from loopy.symbolic import Reduction - if not isinstance(expression, (Call, Reduction)): + from loopy.symbolic import Reduction, ArrayCall + if not isinstance(expression, (ArrayCall, Call, Reduction)): raise LoopyError("right-hand side in multiple assignment must be " "function call or reduction, got: '%s'" % expression) return CallInstruction( @@ -1208,18 +1077,6 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): temp_var_types=temp_var_types, **kwargs) - from loopy.symbolic import ArrayCall - if isinstance(expression, ArrayCall): - atomicity = kwargs.pop("atomicity", ()) - if atomicity: - raise LoopyError("atomic operations with more than one " - "left-hand side not supported") - return ArrayCallInstruction( - assignees=assignees, - expression=expression, - temp_var_types=temp_var_types, - **kwargs) - return Assignment( assignee=assignees[0], expression=expression, -- GitLab From f919fc2fa91ef9218f488b2147ece9c94d6fbaf6 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 00:24:34 -0600 Subject: [PATCH 102/116] Removed ArrayCallInstruction --- loopy/preprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index c5fc25d92..ad119e94e 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -749,9 +749,9 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): # }}} - from loopy.kernel.instruction import CallInstruction, ArrayCallInstruction + from loopy.kernel.instruction import CallInstruction for insn in kernel.instructions: - if not isinstance(insn, (CallInstruction, ArrayCallInstruction)): + if not isinstance(insn, CallInstruction): continue if len(insn.assignees) <= 1: -- GitLab From 17c7dfe9423b39510f7cadfd25f40ead10015102 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 00:25:40 -0600 Subject: [PATCH 103/116] Removed the ArrayCallInstruction --- loopy/check.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 691783e86..7e661b566 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -131,12 +131,10 @@ def check_for_double_use_of_hw_axes(kernel): def check_for_inactive_iname_access(kernel): - from loopy.kernel.instruction import ArrayCallInstruction for insn in kernel.instructions: expression_inames = insn.read_dependency_names() & kernel.all_inames() - if not expression_inames <= kernel.insn_inames(insn) and ( - not isinstance(insn, ArrayCallInstruction)): + if not expression_inames <= kernel.insn_inames(insn): raise LoopyError( "instruction '%s' references " "inames '%s' that the instruction does not depend on" @@ -172,12 +170,9 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): from loopy.kernel.data import ConcurrentTag - from loopy.kernel.instruction import ArrayCallInstruction iname_to_tag = kernel.iname_to_tag.get for insn in kernel.instructions: - if isinstance(insn, ArrayCallInstruction): - continue for assignee_name, assignee_indices in zip( insn.assignee_var_names(), insn.assignee_subscript_deps()): -- GitLab From 645e05ca7be2c78a22f519ec11903a0f5407644e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 00:28:49 -0600 Subject: [PATCH 104/116] Removed ArrayCallInstruction --- loopy/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index ddfbf1624..9f3f99903 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -40,8 +40,7 @@ from loopy.kernel.instruction import ( memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate, InstructionBase, MultiAssignmentBase, Assignment, ExpressionInstruction, - CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction, - ArrayCallInstruction) + CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction) from loopy.kernel.data import ( auto, KernelArgument, @@ -158,7 +157,7 @@ __all__ = [ "InstructionBase", "MultiAssignmentBase", "Assignment", "ExpressionInstruction", "CallInstruction", "CInstruction", "NoOpInstruction", - "BarrierInstruction", "ArrayCallInstruction", + "BarrierInstruction", "KernelArgument", "ValueArg", "GlobalArg", "ConstantArg", "ImageArg", -- GitLab From ff6ec681a6714b98c5e89c63e9de38a852da378d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 00:34:22 -0600 Subject: [PATCH 105/116] Cleaning up the mess of ArrayCallInstruction --- loopy/codegen/__init__.py | 5 +- loopy/codegen/auxiliary_kernels.py | 308 +++++++++++++++++++++++++++++ loopy/codegen/instruction.py | 28 +-- 3 files changed, 311 insertions(+), 30 deletions(-) create mode 100644 loopy/codegen/auxiliary_kernels.py diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 108b70a33..1981be483 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -35,8 +35,7 @@ from loopy.version import DATA_MODEL_VERSION from cgen import Collection from loopy.kernel.instruction import ( - ArrayCallInstruction, Assignment, - NoOpInstruction, BarrierInstruction, CallInstruction, + Assignment, NoOpInstruction, BarrierInstruction, CallInstruction, _DataObliviousInstruction) import logging @@ -502,7 +501,7 @@ def generate_code_v2(kernel): get_instruction_specific_kernel, generate_auxillary_kernel_device_code) for insn in kernel.instructions: - if isinstance(insn, ArrayCallInstruction): + if isinstance(insn, CallInstruction) and insn.is_array_call: if insn.expression.function.name == func: compliant_knl = get_instruction_specific_kernel( insn, kernel, aux_knl) diff --git a/loopy/codegen/auxiliary_kernels.py b/loopy/codegen/auxiliary_kernels.py new file mode 100644 index 000000000..75a886c70 --- /dev/null +++ b/loopy/codegen/auxiliary_kernels.py @@ -0,0 +1,308 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2018 Kaushik Kulkarni" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import islpy as isl + +from loopy.codegen import ( + ImplementedDataInfo, + PreambleInfo, + CodeGenerationState) +from loopy.diagnostic import LoopyError +from loopy.kernel.instruction import ( + Assignment, NoOpInstruction, BarrierInstruction, CallInstruction, + _DataObliviousInstruction) +from cgen import Collection + +import logging +logger = logging.getLogger(__name__) + + +__doc__ = """ +.. currentmodule:: loopy + +.. autofunction:: generate_auxillary_kernel_device_code + +.. autofunction:: get_instruction_specific_kernel +""" + +# {{{ generating compliant kernel for the given instruction + + +def check_compliance(insn, parent_knl, child_knl, child_to_parent): + + # {{{ getting the parent to child mapping + + parent_to_child = {} + for child, parent in child_to_parent.items(): + parent_to_child[parent] = child + + # }}} + + # {{{ dtype compliance + for arg in child_knl.args: + name_in_parent = child_to_parent[arg.name] + parent_arg = parent_knl.arg_dict[name_in_parent] + if arg.dtype is not None: + assert arg.dtype == parent_arg.dtype, ("While registering kernel the" + "dtypes of variables don't match") + # }}} + + """ + # Disabling for now, till I have a function for finding the swept region + # {{{ axes used by the swept_inames + + parent_parameters = insn.expression.parameters + parent_assignees = insn.exression.assignees + for par in parent_parameters + parent_assignees: + inames = par.swept_inames + child_arg = child_knl.arg_dict[parent_to_child[par.name]] + + # check to ensure the equality of number of axes around both the + # kernels + assert len(child_arg.shape) == len(inames), ("regsiter_knl: The ") + + parent_swept_region = par.swept_region() + child_swept_region = child_arg.shape + + for parent_swept, child_swept in zip(parent_swept_region, + child_swept_region): + assert parent_swept == child_swept, ("regsiter_kernel: send only the" + "part of the array you intend to write to the child kernel") + """ + + # }}} + + +def get_instruction_specific_kernel(insn, parent_knl, child_knl): + """ Generates the kernel with the arguments strided so that it is compliant + with the given instruction. Returns the new compliant kernel. + """ + child_knl = child_knl.copy( + name=insn.expression.function.name + "_" + insn.id) + dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) + + # {{{ creating the parent to child parameter association dictionary + + child_arg_to_parent = {} + for child_par, parent_par in zip(child_knl.args, + insn.expression.parameters + insn.assignees): + child_arg_to_parent[child_par.name] = ( + parent_par.subscript.aggregate.name) + + # }}} + + check_compliance(insn, parent_knl, child_knl, child_arg_to_parent) + + new_args = [] + for arg in child_knl.args: + name_in_parent = child_arg_to_parent[arg.name] + parent_arg = parent_knl.arg_dict[name_in_parent] + child_dim_tag = dim_tags_dict[name_in_parent] + + new_args.append(arg.copy(dim_tags=child_dim_tag, dtype=parent_arg.dtype)) + + child_knl = child_knl.copy(args=new_args) + + return child_knl + +# }}} + +# {{{ code generation for the auxillary kernel + + +def generate_auxillary_kernel_device_code(kernel, target): + """ + Generates device programs for the given auxillary kernel, with the target + specified by the parent kernel + :returns: a :class:`CodeGenerationResult` + """ + kernel = kernel.copy(target=target) + + from loopy.kernel import kernel_state + if kernel.state == kernel_state.INITIAL: + from loopy.preprocess import preprocess_kernel + kernel = preprocess_kernel(kernel) + + if kernel.schedule is None: + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) + + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError( + "cannot generate code for a kernel that has not been " + "scheduled") + + from loopy.type_inference import infer_unknown_types + kernel = infer_unknown_types(kernel, expect_completion=True) + + from loopy.check import pre_codegen_checks + pre_codegen_checks(kernel) + + logger.info("%s: generate Auxillary Kernel code: start" % kernel.name) + + # {{{ examine arg list + + from loopy.kernel.data import ValueArg + from loopy.kernel.array import ArrayBase + + implemented_data_info = [] + + for arg in kernel.args: + is_written = arg.name in kernel.get_written_variables() + if isinstance(arg, ArrayBase): + implemented_data_info.extend( + arg.decl_info( + kernel.target, + is_written=is_written, + index_dtype=kernel.index_dtype)) + + elif isinstance(arg, ValueArg): + implemented_data_info.append(ImplementedDataInfo( + target=kernel.target, + name=arg.name, + dtype=arg.dtype, + arg_class=ValueArg, + is_written=is_written)) + + else: + raise ValueError("argument type not understood: '%s'" % type(arg)) + + allow_complex = False + for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): + if var.dtype.involves_complex(): + allow_complex = True + + # }}} + + seen_dtypes = set() + seen_functions = set() + seen_atomic_dtypes = set() + + initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions) + codegen_state = CodeGenerationState( + kernel=kernel, + implemented_data_info=implemented_data_info, + implemented_domain=initial_implemented_domain, + implemented_predicates=frozenset(), + seen_dtypes=seen_dtypes, + seen_functions=seen_functions, + seen_atomic_dtypes=seen_atomic_dtypes, + var_subst_map={}, + allow_complex=allow_complex, + var_name_generator=kernel.get_var_name_generator(), + is_generating_device_code=False, + gen_program_name=kernel.name, + schedule_index_end=len(kernel.schedule), + is_generating_master_kernel=False) + + from loopy.codegen.result import generate_host_or_device_program + + # {{{ handling auxillary kernels + + auxiliary_functions = [] + + for func, aux_knl in kernel.auxiliary_kernels.items(): + from loopy.codegen.auxillary_kernels import ( + get_instruction_specific_kernel, + generate_auxillary_kernel_device_code) + for insn in kernel.instructions: + if isinstance(insn, CallInstruction): + if insn.expression.function.name == func: + compliant_knl = get_instruction_specific_kernel( + insn, kernel, aux_knl) + # TODO: Also need to take input such as allow_complex, + # and preambles from the aux kernels + aux_func = generate_auxillary_kernel_device_code( + compliant_knl, + kernel.target).device_programs[0].ast # noqa + auxiliary_functions.append(aux_func) + elif isinstance(insn, (Assignment, NoOpInstruction, Assignment, + BarrierInstruction, CallInstruction, + _DataObliviousInstruction)): + pass + else: + raise NotImplementedError( + "register_knl not made for %s type of" + "instruciton" % (str(type(insn)))) + # }}} + codegen_result = generate_host_or_device_program( + codegen_state, + schedule_index=0) + + device_code_str = codegen_result.device_code() + + from loopy.check import check_implemented_domains + assert check_implemented_domains( + kernel, codegen_result.implemented_domains, device_code_str) + + # {{{ handle preambles + + for arg in kernel.args: + seen_dtypes.add(arg.dtype) + for tv in six.itervalues(kernel.temporary_variables): + seen_dtypes.add(tv.dtype) + + preambles = kernel.preambles[:] + + preamble_info = PreambleInfo( + kernel=kernel, + seen_dtypes=seen_dtypes, + seen_functions=seen_functions, + # a set of LoopyTypes (!) + seen_atomic_dtypes=seen_atomic_dtypes, + codegen_state=codegen_state + ) + + preamble_generators = kernel.preamble_generators + for prea_gen in preamble_generators: + preambles.extend(prea_gen(preamble_info)) + + codegen_result = codegen_result.copy(device_preambles=preambles) + + # }}} + + # {{{ Pasting the auxillary functions code to the first device program + # TODO: Currently Sticks all the functions only in the first dev_prog, + # need to identify which function goes with which kernel + new_dev_prog = codegen_result.device_programs[0] + for func in auxillary_functions: + new_dev_prog = new_dev_prog.copy( + ast=Collection([func, new_dev_prog.ast])) + new_device_programs = [new_dev_prog] + codegen_result.device_programs[1:] + codegen_result = codegen_result.copy(device_programs=new_device_programs) + # }}} + # For faster unpickling in the common case when implemented_domains isn't needed. + from loopy.tools import LazilyUnpicklingDict + codegen_result = codegen_result.copy( + implemented_domains=LazilyUnpicklingDict( + codegen_result.implemented_domains)) + + logger.info("%s: generate code: done" % kernel.name) + + return codegen_result + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index c027d9579..e2a060e07 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -76,14 +76,12 @@ def generate_instruction_code(codegen_state, insn): kernel = codegen_state.kernel from loopy.kernel.instruction import (Assignment, CallInstruction, - CInstruction, ArrayCallInstruction) + CInstruction) if isinstance(insn, Assignment): ast = generate_assignment_instruction_code(codegen_state, insn) elif isinstance(insn, CallInstruction): ast = generate_call_code(codegen_state, insn) - elif isinstance(insn, ArrayCallInstruction): - ast = generate_array_call_code(codegen_state, insn) elif isinstance(insn, CInstruction): ast = generate_c_instruction_code(codegen_state, insn) else: @@ -246,30 +244,6 @@ def generate_call_code(codegen_state, insn): return result -def generate_array_call_code(codegen_state, insn): - kernel = codegen_state.kernel - - # {{{ vectorization handling - - if codegen_state.vectorization_info: - if insn.atomicity: - raise Unvectorizable("atomic operation") - - # }}} - - result = codegen_state.ast_builder.emit_array_call( - codegen_state, insn) - - # {{{ tracing - - if kernel.options.trace_assignments or kernel.options.trace_assignment_values: - raise NotImplementedError("tracing of multi-output function calls") - - # }}} - - return result - - def generate_c_instruction_code(codegen_state, insn): kernel = codegen_state.kernel -- GitLab From 20801b479579f24ce85d0be29a25103090fee68f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 00:45:32 -0600 Subject: [PATCH 106/116] Generalized CallInstructin --- loopy/type_inference.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 6c8cf8466..533d623fd 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -454,24 +454,6 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): assert found if result_i is not None: result.append(result_i) - elif isinstance(writer_insn, lp.ArrayCallInstruction): - return_dtype_set = type_inf_mapper(expr, return_tuple=True, - return_dtype_set=True) - - result = [] - for return_dtype_set in return_dtype_set: - result_i = None - found = False - for assignee, comp_dtype_set in zip( - writer_insn.assignee_var_names(), return_dtype_set): - if assignee == var_name: - found = True - result_i = comp_dtype_set - break - - assert found - if result_i is not None: - result.append(result_i) debug(" result: %s", result) -- GitLab From deba44364f9012cebb6284b8d39adc1904ba7d96 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 02:09:07 -0600 Subject: [PATCH 107/116] Completely removed ArrayCallInstruction --- loopy/codegen/__init__.py | 20 +- loopy/codegen/auxiliary_kernels.py | 20 +- loopy/codegen/auxillary_kernels.py | 309 ----------------------------- loopy/codegen/instruction.py | 2 +- loopy/kernel/__init__.py | 6 +- loopy/kernel/function_interface.py | 120 +++++++++-- loopy/kernel/instruction.py | 11 +- loopy/symbolic.py | 34 ++-- loopy/target/__init__.py | 2 +- loopy/target/c/__init__.py | 41 ++-- loopy/transform/register_knl.py | 8 +- 11 files changed, 189 insertions(+), 384 deletions(-) delete mode 100644 loopy/codegen/auxillary_kernels.py diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 1981be483..9e6db010d 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -197,7 +197,7 @@ class CodeGenerationState(object): .. attribute:: is_generating_master_kernel True of False indication if the code generation is happening for a - master kernel or auxillary kernels respectively. + master kernel or auxiliary kernels respectively. """ def __init__(self, kernel, @@ -492,14 +492,14 @@ def generate_code_v2(kernel): is_generating_master_kernel=True) from loopy.codegen.result import generate_host_or_device_program - # {{{ handling auxillary kernels + # {{{ handling auxiliary kernels - auxillary_functions = [] + auxiliary_functions = [] - for func, aux_knl in kernel.auxillary_kernels.items(): - from loopy.codegen.auxillary_kernels import ( + for func, aux_knl in kernel.auxiliary_kernels.items(): + from loopy.codegen.auxiliary_kernels import ( get_instruction_specific_kernel, - generate_auxillary_kernel_device_code) + generate_auxiliary_kernel_device_code) for insn in kernel.instructions: if isinstance(insn, CallInstruction) and insn.is_array_call: if insn.expression.function.name == func: @@ -507,9 +507,9 @@ def generate_code_v2(kernel): insn, kernel, aux_knl) # TODO: Also need to take input such as allow_complex, # and preambles from the aux kernels - aux_func = generate_auxillary_kernel_device_code(compliant_knl, + aux_func = generate_auxiliary_kernel_device_code(compliant_knl, kernel.target).device_programs[0].ast - auxillary_functions.append(aux_func) + auxiliary_functions.append(aux_func) elif isinstance(insn, (Assignment, NoOpInstruction, Assignment, BarrierInstruction, CallInstruction, _DataObliviousInstruction)): @@ -555,11 +555,11 @@ def generate_code_v2(kernel): # }}} - # {{{ Pasting the auxillary functions code to the first device program + # {{{ Pasting the auxiliary functions code to the first device program # TODO: Currently Sticks all the functions only in the first dev_prog, # need to identify which function goes with which kernel new_dev_prog = codegen_result.device_programs[0] - for func in auxillary_functions: + for func in auxiliary_functions: new_dev_prog = new_dev_prog.copy( ast=Collection([func, new_dev_prog.ast])) new_device_programs = [new_dev_prog] + codegen_result.device_programs[1:] diff --git a/loopy/codegen/auxiliary_kernels.py b/loopy/codegen/auxiliary_kernels.py index 75a886c70..b1ecbc3ff 100644 --- a/loopy/codegen/auxiliary_kernels.py +++ b/loopy/codegen/auxiliary_kernels.py @@ -42,7 +42,7 @@ logger = logging.getLogger(__name__) __doc__ = """ .. currentmodule:: loopy -.. autofunction:: generate_auxillary_kernel_device_code +.. autofunction:: generate_auxiliary_kernel_device_code .. autofunction:: get_instruction_specific_kernel """ @@ -129,12 +129,12 @@ def get_instruction_specific_kernel(insn, parent_knl, child_knl): # }}} -# {{{ code generation for the auxillary kernel +# {{{ code generation for the auxiliary kernel -def generate_auxillary_kernel_device_code(kernel, target): +def generate_auxiliary_kernel_device_code(kernel, target): """ - Generates device programs for the given auxillary kernel, with the target + Generates device programs for the given auxiliary kernel, with the target specified by the parent kernel :returns: a :class:`CodeGenerationResult` """ @@ -219,14 +219,14 @@ def generate_auxillary_kernel_device_code(kernel, target): from loopy.codegen.result import generate_host_or_device_program - # {{{ handling auxillary kernels + # {{{ handling auxiliary kernels auxiliary_functions = [] for func, aux_knl in kernel.auxiliary_kernels.items(): - from loopy.codegen.auxillary_kernels import ( + from loopy.codegen.auxiliary_kernels import ( get_instruction_specific_kernel, - generate_auxillary_kernel_device_code) + generate_auxiliary_kernel_device_code) for insn in kernel.instructions: if isinstance(insn, CallInstruction): if insn.expression.function.name == func: @@ -234,7 +234,7 @@ def generate_auxillary_kernel_device_code(kernel, target): insn, kernel, aux_knl) # TODO: Also need to take input such as allow_complex, # and preambles from the aux kernels - aux_func = generate_auxillary_kernel_device_code( + aux_func = generate_auxiliary_kernel_device_code( compliant_knl, kernel.target).device_programs[0].ast # noqa auxiliary_functions.append(aux_func) @@ -283,11 +283,11 @@ def generate_auxillary_kernel_device_code(kernel, target): # }}} - # {{{ Pasting the auxillary functions code to the first device program + # {{{ Pasting the auxiliary functions code to the first device program # TODO: Currently Sticks all the functions only in the first dev_prog, # need to identify which function goes with which kernel new_dev_prog = codegen_result.device_programs[0] - for func in auxillary_functions: + for func in auxiliary_functions: new_dev_prog = new_dev_prog.copy( ast=Collection([func, new_dev_prog.ast])) new_device_programs = [new_dev_prog] + codegen_result.device_programs[1:] diff --git a/loopy/codegen/auxillary_kernels.py b/loopy/codegen/auxillary_kernels.py deleted file mode 100644 index 3f7a0ff0f..000000000 --- a/loopy/codegen/auxillary_kernels.py +++ /dev/null @@ -1,309 +0,0 @@ -from __future__ import division, absolute_import - -__copyright__ = "Copyright (C) 2018 Kaushik Kulkarni" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import six -import islpy as isl - -from loopy.codegen import ( - ImplementedDataInfo, - PreambleInfo, - CodeGenerationState) -from loopy.diagnostic import LoopyError -from loopy.kernel.instruction import ( - ArrayCallInstruction, Assignment, - NoOpInstruction, BarrierInstruction, CallInstruction, - _DataObliviousInstruction) -from cgen import Collection - -import logging -logger = logging.getLogger(__name__) - - -__doc__ = """ -.. currentmodule:: loopy - -.. autofunction:: generate_auxillary_kernel_device_code - -.. autofunction:: get_instruction_specific_kernel -""" - -# {{{ generating compliant kernel for the given instruction - - -def check_compliance(insn, parent_knl, child_knl, child_to_parent): - - # {{{ getting the parent to child mapping - - parent_to_child = {} - for child, parent in child_to_parent.items(): - parent_to_child[parent] = child - - # }}} - - # {{{ dtype compliance - for arg in child_knl.args: - name_in_parent = child_to_parent[arg.name] - parent_arg = parent_knl.arg_dict[name_in_parent] - if arg.dtype is not None: - assert arg.dtype == parent_arg.dtype, ("While registering kernel the" - "dtypes of variables don't match") - # }}} - - """ - # Disabling for now, till I have a function for finding the swept region - # {{{ axes used by the swept_inames - - parent_parameters = insn.expression.parameters - parent_assignees = insn.exression.assignees - for par in parent_parameters + parent_assignees: - inames = par.swept_inames - child_arg = child_knl.arg_dict[parent_to_child[par.name]] - - # check to ensure the equality of number of axes around both the - # kernels - assert len(child_arg.shape) == len(inames), ("regsiter_knl: The ") - - parent_swept_region = par.swept_region() - child_swept_region = child_arg.shape - - for parent_swept, child_swept in zip(parent_swept_region, - child_swept_region): - assert parent_swept == child_swept, ("regsiter_kernel: send only the" - "part of the array you intend to write to the child kernel") - """ - - # }}} - - -def get_instruction_specific_kernel(insn, parent_knl, child_knl): - """ Generates the kernel with the arguments strided so that it is compliant - with the given instruction. Returns the new compliant kernel. - """ - child_knl = child_knl.copy( - name=insn.expression.function.name + "_" + insn.id) - dim_tags_dict = insn.get_parameters_dim_tag_dict(parent_knl.arg_dict) - - # {{{ creating the parent to child parameter association dictionary - - child_arg_to_parent = {} - for child_par, parent_par in zip(child_knl.args, - insn.expression.parameters + insn.assignees): - child_arg_to_parent[child_par.name] = ( - parent_par.subscript.aggregate.name) - - # }}} - - check_compliance(insn, parent_knl, child_knl, child_arg_to_parent) - - new_args = [] - for arg in child_knl.args: - name_in_parent = child_arg_to_parent[arg.name] - parent_arg = parent_knl.arg_dict[name_in_parent] - child_dim_tag = dim_tags_dict[name_in_parent] - - new_args.append(arg.copy(dim_tags=child_dim_tag, dtype=parent_arg.dtype)) - - child_knl = child_knl.copy(args=new_args) - - return child_knl - -# }}} - -# {{{ code generation for the auxillary kernel - - -def generate_auxillary_kernel_device_code(kernel, target): - """ - Generates device programs for the given auxillary kernel, with the target - specified by the parent kernel - :returns: a :class:`CodeGenerationResult` - """ - kernel = kernel.copy(target=target) - - from loopy.kernel import kernel_state - if kernel.state == kernel_state.INITIAL: - from loopy.preprocess import preprocess_kernel - kernel = preprocess_kernel(kernel) - - if kernel.schedule is None: - from loopy.schedule import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) - - if kernel.state != kernel_state.SCHEDULED: - raise LoopyError( - "cannot generate code for a kernel that has not been " - "scheduled") - - from loopy.type_inference import infer_unknown_types - kernel = infer_unknown_types(kernel, expect_completion=True) - - from loopy.check import pre_codegen_checks - pre_codegen_checks(kernel) - - logger.info("%s: generate Auxillary Kernel code: start" % kernel.name) - - # {{{ examine arg list - - from loopy.kernel.data import ValueArg - from loopy.kernel.array import ArrayBase - - implemented_data_info = [] - - for arg in kernel.args: - is_written = arg.name in kernel.get_written_variables() - if isinstance(arg, ArrayBase): - implemented_data_info.extend( - arg.decl_info( - kernel.target, - is_written=is_written, - index_dtype=kernel.index_dtype)) - - elif isinstance(arg, ValueArg): - implemented_data_info.append(ImplementedDataInfo( - target=kernel.target, - name=arg.name, - dtype=arg.dtype, - arg_class=ValueArg, - is_written=is_written)) - - else: - raise ValueError("argument type not understood: '%s'" % type(arg)) - - allow_complex = False - for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): - if var.dtype.involves_complex(): - allow_complex = True - - # }}} - - seen_dtypes = set() - seen_functions = set() - seen_atomic_dtypes = set() - - initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions) - codegen_state = CodeGenerationState( - kernel=kernel, - implemented_data_info=implemented_data_info, - implemented_domain=initial_implemented_domain, - implemented_predicates=frozenset(), - seen_dtypes=seen_dtypes, - seen_functions=seen_functions, - seen_atomic_dtypes=seen_atomic_dtypes, - var_subst_map={}, - allow_complex=allow_complex, - var_name_generator=kernel.get_var_name_generator(), - is_generating_device_code=False, - gen_program_name=kernel.name, - schedule_index_end=len(kernel.schedule), - is_generating_master_kernel=False) - - from loopy.codegen.result import generate_host_or_device_program - - # {{{ handling auxillary kernels - - auxillary_functions = [] - - for func, aux_knl in kernel.auxillary_kernels.items(): - from loopy.codegen.auxillary_kernels import ( - get_instruction_specific_kernel, - generate_auxillary_kernel_device_code) - for insn in kernel.instructions: - if isinstance(insn, ArrayCallInstruction): - if insn.expression.function.name == func: - compliant_knl = get_instruction_specific_kernel( - insn, kernel, aux_knl) - # TODO: Also need to take input such as allow_complex, - # and preambles from the aux kernels - aux_func = generate_auxillary_kernel_device_code( - compliant_knl, - kernel.target).device_programs[0].ast # noqa - auxillary_functions.append(aux_func) - elif isinstance(insn, (Assignment, NoOpInstruction, Assignment, - BarrierInstruction, CallInstruction, - _DataObliviousInstruction)): - pass - else: - raise NotImplementedError( - "register_knl not made for %s type of" - "instruciton" % (str(type(insn)))) - # }}} - codegen_result = generate_host_or_device_program( - codegen_state, - schedule_index=0) - - device_code_str = codegen_result.device_code() - - from loopy.check import check_implemented_domains - assert check_implemented_domains( - kernel, codegen_result.implemented_domains, device_code_str) - - # {{{ handle preambles - - for arg in kernel.args: - seen_dtypes.add(arg.dtype) - for tv in six.itervalues(kernel.temporary_variables): - seen_dtypes.add(tv.dtype) - - preambles = kernel.preambles[:] - - preamble_info = PreambleInfo( - kernel=kernel, - seen_dtypes=seen_dtypes, - seen_functions=seen_functions, - # a set of LoopyTypes (!) - seen_atomic_dtypes=seen_atomic_dtypes, - codegen_state=codegen_state - ) - - preamble_generators = kernel.preamble_generators - for prea_gen in preamble_generators: - preambles.extend(prea_gen(preamble_info)) - - codegen_result = codegen_result.copy(device_preambles=preambles) - - # }}} - - # {{{ Pasting the auxillary functions code to the first device program - # TODO: Currently Sticks all the functions only in the first dev_prog, - # need to identify which function goes with which kernel - new_dev_prog = codegen_result.device_programs[0] - for func in auxillary_functions: - new_dev_prog = new_dev_prog.copy( - ast=Collection([func, new_dev_prog.ast])) - new_device_programs = [new_dev_prog] + codegen_result.device_programs[1:] - codegen_result = codegen_result.copy(device_programs=new_device_programs) - # }}} - # For faster unpickling in the common case when implemented_domains isn't needed. - from loopy.tools import LazilyUnpicklingDict - codegen_result = codegen_result.copy( - implemented_domains=LazilyUnpicklingDict( - codegen_result.implemented_domains)) - - logger.info("%s: generate code: done" % kernel.name) - - return codegen_result - -# }}} - -# vim: foldmethod=marker diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index e2a060e07..1419f4a67 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -231,7 +231,7 @@ def generate_call_code(codegen_state, insn): # }}} - result = codegen_state.ast_builder.emit_multiple_assignment( + result = codegen_state.ast_builder.emit_call( codegen_state, insn) # {{{ tracing diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index ae49d30ee..efda1e8ff 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -150,7 +150,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): a mapping from substitution names to :class:`SubstitutionRule` objects - .. attribute:: auxillary_kernels + .. attribute:: auxiliary_kernels A dictionary of kernels that are to be mapped from their registered function names @@ -201,7 +201,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): temporary_variables={}, iname_to_tag={}, substitutions={}, - auxillary_kernels={}, + auxiliary_kernels={}, function_manglers=[ default_function_mangler, single_arg_function_mangler, @@ -287,7 +287,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): local_sizes=local_sizes, iname_to_tag=iname_to_tag, substitutions=substitutions, - auxillary_kernels=auxillary_kernels, + auxiliary_kernels=auxiliary_kernels, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 4335c626d..7ba6e479c 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -4,7 +4,7 @@ from pytools import ImmutableRecord from loopy.diagnostic import LoopyError from collections.abc import Mapping -from loopy.types import LoopyType +from loopy.kernel.instruction import CallInstruction # {{{ argument descriptors @@ -56,6 +56,26 @@ class ArrayArgDescriptor(ArgDescriptor): shape=shape, dim_tags=dim_tags) + def copy(self, dtype=None, mem_scope=None, shape=None, dim_tags=None): + if dtype is None: + dtype = self.dtype + + if mem_scope is None: + mem_scope = self.mem_scope + + if shape is None: + shape = self.shape + + if dim_tags is None: + dim_tags = self.dim_tags + + return ArrayArgDescriptor( + dtype=dtype, + mem_scope=mem_scope, + shape=shape, + dim_tags=dim_tags) + + # }}} @@ -82,33 +102,22 @@ class InKernelCallable(ImmutableRecord): """ - def __init__(self, name, arg_id_to_descr, arg_id_to_keyword): + def __init__(self, name=None, arg_id_to_descr=None, arg_id_to_keyword=None): # {{{ sanity checks if not isinstance(name, str): raise LoopyError("name of a InKernelCallable should be a string") - if not isinstance(arg_id_to_descr, Mapping): - raise LoopyError("arg_id_to_dtype of a InKernelCallable should be a" - "mapping") - if not isinstance(arg_id_to_keyword, Mapping): raise LoopyError("arg_id_to_keyword of a InKernelCallable should be a" "mapping") - for arg_id, dtype in arg_id_to_descr.items(): - if not isinstance(dtype, ArgDescriptor): - raise LoopyError("arg_dtypes must have entries of type LoopyType") - for arg_id, keyword in arg_id_to_keyword.items(): - if not isinstance(dtype, str): + if not isinstance(keyword, str): raise LoopyError("keyword must be a string") - assert len(arg_id_to_descr) == len(arg_id_to_keyword), ("Length of the" - "keyword and the dtype mapping must be same.") - - id_set = frozenset([id for id in arg_id_to_descr]) + id_set = frozenset([id for id in arg_id_to_keyword]) assert len(arg_id_to_descr) == max(id_set) - min(id_set) + 1, ("Either" "repeated id encountered in arg_id_dtype or missed the dtype of some" "of the keyword") @@ -266,14 +275,47 @@ class PassByValueFunction(InKernelCallable): # }}} +# {{{ helper function for CallableKernel + +def get_arg_id_to_keyword_from_kernel(kernel): + + arg_id_to_keyword = {} + written_args = kernel.get_written_variables() + + read_count = 0 + write_count = -1 + + for arg in kernel.args: + if arg.name in written_args: + arg_id_to_keyword[write_count] = arg.name + write_count -= 1 + else: + arg_id_to_keyword[read_count] = arg.name + read_count += 1 + + return arg_id_to_keyword + +# }}} + + # {{{ callable kernel class CallableKernel(InKernelCallable): """ .. attribute:: subkernel + .. attribute:: arg_id_to_keyword + + .. attribute:: arg_id_dtype + """ + def __init__(self, name, subkernel): + + self.name = name + self.subkernel = subkernel + self.arg_id_to_keyword = get_arg_id_to_keyword_from_kernel(subkernel) + def copy(self, name=None, subkernel=None): if name is None: name = self.name @@ -284,6 +326,54 @@ class CallableKernel(InKernelCallable): return CallableKernel(name=name, subkernel=subkernel) + def is_arg_written(self, arg_id): + """ Checks whether a given argument is written + """ + if isinstance(arg_id, int): + arg_id = self.arg_id_to_keyword[arg_id] + + assert isinstance(arg_id, str) + + return arg_id in self.subkernel.get_written_variables() + + def pre_process_check(self, insn): + """ + Before the pre_processing of the parent kernel performs simple checks to + check comptatibility + """ + + # instruction should be a CallInstruction + if not isinstance(insn, CallInstruction): + raise LoopyError("The given instruction should be a CallInstruction") + + # number of assignees should match + if len(insn.assignees) != len([a for a in self.arg_id_to_keyword if a < 0]): + raise LoopyError("The number of assignees do not match") + + # number of parameters should match + if len(insn.expression.parameters) != len([a for a in + self.arg_id_to_keyword if a >= 0]): + raise LoopyError("The number of parameters do not match") + + def with_types(self, arg_id_to_dtype): + # can't exactly figure out the case when the kernel wont agree the + # arg_id_to_dtype in the case CallableKernel + + new_args = [] + arg_dict = self.subkernel.arg_dict + arg_keyword_to_dtype = {} + for id, dtype in arg_id_to_dtype: + if not isinstance(id, str): + id = self.id_to_keyword[id] + arg_keyword_to_dtype[id] = dtype + + for keyword, dtype in arg_keyword_to_dtype.items(): + new_args.append(arg_dict[keyword].copy(dtype=dtype)) + + new_subkernel = self.subkernel.copy(args=new_args) + + return self.copy(subkernel=new_subkernel) + # }}} diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 2c877e360..9ebcf73e9 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -981,9 +981,10 @@ class CallInstruction(MultiAssignmentBase): expression = parse(expression) from pymbolic.primitives import Variable, Subscript - from loopy.symbolic import LinearSubscript + from loopy.symbolic import LinearSubscript, SubArrayRef for assignee in assignees: - if not isinstance(assignee, (Variable, Subscript, LinearSubscript)): + if not isinstance(assignee, (Variable, Subscript, LinearSubscript, + SubArrayRef)): raise LoopyError("invalid lvalue '%s'" % assignee) self.assignees = assignees @@ -1040,7 +1041,7 @@ class CallInstruction(MultiAssignmentBase): return dim_tags_dict - @property() + @property def is_array_call(self): from loopy.symbolic import SubArrayRef for arg in self.assignees + self.expression.parameters: @@ -1060,7 +1061,9 @@ class CallInstruction(MultiAssignmentBase): def make_assignment(assignees, expression, temp_var_types=None, **kwargs): - if len(assignees) > 1 or len(assignees) == 0: + from loopy.symbolic import ArrayCall + if len(assignees) > 1 or len(assignees) == 0 or (isinstance(expression, + ArrayCall)): atomicity = kwargs.pop("atomicity", ()) if atomicity: raise LoopyError("atomic operations with more than one " diff --git a/loopy/symbolic.py b/loopy/symbolic.py index bd669ac59..b0c27798f 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -643,20 +643,30 @@ class SubArrayRef(p.Expression): The subscript whose adress space is to be referenced """ - init_arg_names = ("swept_inames", "subscript") - - def __init__(self, _inames, _subscr): - # {{{ Sanity Checks - if not isinstance(_inames, tuple): - assert isinstance(_inames, p.Variable) - _inames = (_inames,) - assert isinstance(_inames, tuple) - for iname in _inames: + init_arg_names = ("swept_inames", "subscript", "keyword") + + def __init__(self, swept_inames=None, subscript=None, keyword=None): + + # {{{ sanity checks + + if not isinstance(swept_inames, tuple): + assert isinstance(swept_inames, p.Variable) + swept_inames = (swept_inames,) + + assert isinstance(swept_inames, tuple) + + for iname in swept_inames: assert isinstance(iname, p.Variable) - assert isinstance(_subscr, p.Subscript) + assert isinstance(subscript, p.Subscript) + + if keyword is not None: + assert isinstance(keyword, str) + # }}} - self.swept_inames = _inames - self.subscript = _subscr + + self.swept_inames = swept_inames + self.subscript = subscript + self.keyword = keyword def get_begin_subscript(self): starting_inames = [] diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index a08b406f5..b98e14998 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -206,7 +206,7 @@ class ASTBuilderBase(object): def emit_assignment(self, codegen_state, insn): raise NotImplementedError() - def emit_multiple_assignment(self, codegen_state, insn): + def emit_call(self, codegen_state, insn): raise NotImplementedError() def emit_sequential_loop(self, codegen_state, iname, iname_dtype, diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index d07cf5d85..cc3a70ebd 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -835,7 +835,7 @@ class CASTBuilder(ASTBuilderBase): return block_if_necessary(assignments) - def emit_multiple_assignment(self, codegen_state, insn): + def emit_call(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper from pymbolic.primitives import Variable @@ -851,9 +851,19 @@ class CASTBuilder(ASTBuilderBase): codegen_state.kernel.get_var_descriptor(a) for a in insn.assignee_var_names()] - par_dtypes = tuple(ecm.infer_type(par) for par in parameters) + par_dtypes = tuple(ecm.infer_type(par) for par in parameters + + insn.assignees) + + if insn.is_array_call: + func_id = "{func}_{insn}".format(func=func_id, + insn=insn.id) + mangle_result = PassByValueFunction( + name=func_id, + arg_dtypes=par_dtypes, + result_dtype=par_dtypes[0]) + else: + mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) - mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) if mangle_result is None: raise RuntimeError("function '%s' unknown--" "maybe you need to register a function mangler?" @@ -861,7 +871,7 @@ class CASTBuilder(ASTBuilderBase): assert mangle_result.arg_dtypes is not None - if mangle_result.target_name == "loopy_make_tuple": + if mangle_result.name == "loopy_make_tuple": # This shortcut avoids actually having to emit a 'make_tuple' function. return self.emit_tuple_assignment(codegen_state, insn) @@ -871,14 +881,15 @@ class CASTBuilder(ASTBuilderBase): dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( - parameters, par_dtypes, mangle_result.arg_dtypes)] + parameters+insn.assignees, par_dtypes, mangle_result.arg_dtypes)] from loopy.codegen import SeenFunction codegen_state.seen_functions.add( SeenFunction(func_id, - mangle_result.target_name, + mangle_result.name, mangle_result.arg_dtypes)) + """ from pymbolic import var for i, (a, tgt_dtype) in enumerate( zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): @@ -891,21 +902,21 @@ class CASTBuilder(ASTBuilderBase): ecm(a, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr)) + """ from pymbolic import var - result = var(mangle_result.target_name)(*c_parameters) - - # In case of no assignees, we are done - if len(mangle_result.result_dtypes) == 0: - from cgen import ExpressionStatement - return ExpressionStatement( - CExpression(self.get_c_expression_to_code_mapper(), result)) + result = var(mangle_result.name)(*c_parameters) result = ecm.wrap_in_typecast( - mangle_result.result_dtypes[0], + mangle_result.result_dtype, assignee_var_descriptors[0].dtype, result) + if insn.is_array_call: + from cgen import ExpressionStatement + return ExpressionStatement( + CExpression(self.get_c_expression_to_code_mapper(), result)) + lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) from cgen import Assign @@ -978,7 +989,7 @@ class CASTBuilder(ASTBuilderBase): + [ecm.infer_type(ass) for ass in insn.assignees]) if insn.expression.function.name not in ( - codegen_state.kernel.auxillary_kernels): + codegen_state.kernel.auxiliary_kernels): raise RuntimeError("function '%s' unknown--" "maybe you need to register a callable kernel?" % insn.expression.function.name) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index c1f0e77dd..537970b60 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -64,7 +64,7 @@ def register_callable_kernel(parent, function_name, child): assert isinstance(parent, LoopKernel) assert isinstance(child, LoopKernel) assert isinstance(function_name, str) - assert function_name not in parent.auxillary_kernels, ( + assert function_name not in parent.auxiliary_kernels, ( "%s has already been used with some other kernel. One" "function can only be associated with a single kernel" % ( function_name)) @@ -72,10 +72,10 @@ def register_callable_kernel(parent, function_name, child): # }}} # FIXME: needs checks whether the kernels are compliant - new_auxillary_kernels = parent.auxillary_kernels - new_auxillary_kernels[function_name] = child + new_auxiliary_kernels = parent.auxiliary_kernels + new_auxiliary_kernels[function_name] = child - return parent.copy(auxillary_kernels=new_auxillary_kernels) + return parent.copy(auxiliary_kernels=new_auxiliary_kernels) # }}} -- GitLab From c9f4d6fb513a76e742a7647cb6f6e1117409838d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 7 Mar 2018 02:17:40 -0600 Subject: [PATCH 108/116] [ci skip] Removed preamble.py --- loopy/kernel/preamble.py | 74 ---------------------------------------- 1 file changed, 74 deletions(-) delete mode 100644 loopy/kernel/preamble.py diff --git a/loopy/kernel/preamble.py b/loopy/kernel/preamble.py deleted file mode 100644 index 9109ac416..000000000 --- a/loopy/kernel/preamble.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import division, absolute_import - -__copyright__ = "Copyright (C) 2018 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -from pytools import ImmutableRecord - - - - -class PreambleGenerator(ImmutableRecord): - """ A class which is intended to collect all the preambles. This would then - contain the function `generate()`, which would in the end geenrate the final - preamble string. - """ - - def __init__(self, target, premables): - pass - - def generate(self): - pass - - -class PreambleBase(ImmutableRecord): - """ A base class for all the preambles, enountered in Loopy. - """ - def __init__(self): - pass - - -class EmptyPreamble(PreambleBase): - """ This is just the Empty preamble which can be passed by a function whenever it - does need any preambles. For example for some function like `sin` in an OpenCL - code, we do not need any preambles - """ - def __init__(self): - super(EmptyPreamble).__init__(self, - target=None, - preamble=None) - - def generate(self): - return "" - - -class TargetPreamble(PreambleBase): - def __init__(self): - pass - - - -class FunctionPreamble(PreambleBase): - """ This is the preamble for functions. For example while generating a C++ code, - in order to invoke the math function we need to add `#include ` to the - preamble. Such sets of preamble fall in the set of math preambles - """ - def __init__(self, target, preamble_string): - pass -- GitLab From a00868c17f38ed43eaa5899d4fed99ddb1374f75 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 10 Mar 2018 18:59:13 -0600 Subject: [PATCH 109/116] Have the passByValueFunction somewhat working now. --- loopy/__init__.py | 11 +- loopy/kernel/__init__.py | 111 +++++++------ loopy/kernel/function_interface.py | 235 +++++++++++++++++++-------- loopy/kernel/instruction.py | 4 +- loopy/library/function.py | 33 ---- loopy/library/random123.py | 6 + loopy/library/reduction.py | 8 + loopy/symbolic.py | 32 +++- loopy/target/__init__.py | 4 +- loopy/target/c/__init__.py | 203 +++++++---------------- loopy/target/c/codegen/expression.py | 34 ++-- loopy/target/cuda.py | 67 ++++++++ loopy/target/opencl.py | 125 +++++++------- loopy/target/pyopencl.py | 60 ++++--- loopy/type_inference.py | 26 +-- 15 files changed, 532 insertions(+), 427 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 9f3f99903..f5d7f20e2 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -33,14 +33,12 @@ from loopy.diagnostic import LoopyError, LoopyWarning # {{{ imported user interface -from loopy.library.function import ( - default_function_mangler, single_arg_function_mangler) - from loopy.kernel.instruction import ( memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate, InstructionBase, MultiAssignmentBase, Assignment, ExpressionInstruction, CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction) + from loopy.kernel.data import ( auto, KernelArgument, @@ -48,6 +46,9 @@ from loopy.kernel.data import ( temp_var_scope, TemporaryVariable, SubstitutionRule) +from loopy.kernel.function_interface import (InKernelCallable, + CommonReturnTypeCallable, SpecificReturnTypeCallable) + from loopy.kernel import LoopKernel, kernel_state from loopy.kernel.tools import ( get_dot_dependency_graph, @@ -159,13 +160,13 @@ __all__ = [ "CallInstruction", "CInstruction", "NoOpInstruction", "BarrierInstruction", + "InKernelCallable", "SpecificReturnTypeCallable", "CommonReturnTypeCallable", + "KernelArgument", "ValueArg", "GlobalArg", "ConstantArg", "ImageArg", "temp_var_scope", "TemporaryVariable", "SubstitutionRule" - "default_function_mangler", "single_arg_function_mangler", - "make_kernel", "UniqueName", "register_reduction_parser", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index efda1e8ff..be43fafdd 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -35,14 +35,57 @@ import re from pytools import UniqueNameGenerator, generate_unique_names -from loopy.library.function import ( - default_function_mangler, - single_arg_function_mangler) - from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.tools import natsorted from loopy.diagnostic import StaticValueFindingError +from loopy.kernel.function_interface import InKernelCallable + +# {{{ maybe need to remove it, but putting it over here for the moment + + +def default_callables(): + from loopy.library.reduction import reduction_callables + + tuple_callable = {"make_tuple": MakeTupleCallable()} + # TODO: the reduction_callables is empty for now. + # Will change it accoarding to the current system + default_callables = {**reduction_callables(), **tuple_callable} + + return default_callables + + +class MakeTupleCallable(InKernelCallable): + def __init__(self, arg_id_to_dtype=None): + super(MakeTupleCallable, self).__init__(name="loopy_make_tuple") + self.arg_id_to_dtype = arg_id_to_dtype + + def copy(self, arg_id_to_dtype): + if arg_id_to_dtype is None: + arg_id_to_dtype = self.arg_id_to_dtype + + return MakeTupleCallable(self.name, self.arg_id_to_dtype) + + def with_types(self, arg_id_to_dtype): + # there's nothing to check over here, since no other class inherits it this + # will be safe just for `make_tuple` + new_arg_id_to_dtype = arg_id_to_dtype.copy() + for id, dtype in arg_id_to_dtype.items(): + if id >= 0: + # subtracting one because the input 0 maps to the output -1 and so + # on. + new_arg_id_to_dtype[-id-1] = dtype + + return self.copy(new_arg_id_to_dtype), new_arg_id_to_dtype + + def get_target_specific_name(self, target): + return self.name + + def get_preamble(self): + return "" + +# }}} + # {{{ unique var names @@ -202,10 +245,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): iname_to_tag={}, substitutions={}, auxiliary_kernels={}, - function_manglers=[ - default_function_mangler, - single_arg_function_mangler, - ], + callables=default_callables(), symbol_manglers=[], iname_slab_increments={}, @@ -290,7 +330,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): auxiliary_kernels=auxiliary_kernels, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, - function_manglers=function_manglers, + callables=callables, symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, @@ -303,46 +343,25 @@ class LoopKernel(ImmutableRecordWithoutPickling): # }}} - # {{{ function mangling + # {{{ specializing a call - def mangle_function(self, identifier, arg_dtypes, ast_builder=None): + def get_specialized_callable(self, identifier, arg_id_to_dtype, + ast_builder=None): if ast_builder is None: ast_builder = self.target.get_device_ast_builder() - - manglers = ast_builder.function_manglers() + self.function_manglers - - for mangler in manglers: - mangle_result = mangler(self, identifier, arg_dtypes) - if mangle_result is not None: - from loopy.kernel.function_interface import InKernelCallable - if isinstance(mangle_result, InKernelCallable): - assert len(mangle_result.arg_dtypes) == len(arg_dtypes) - return mangle_result - - assert isinstance(mangle_result, tuple) - - from warnings import warn - warn("'%s' returned a tuple instead of a CallMangleInfo instance. " - "This is deprecated." % mangler.__name__, - DeprecationWarning) - ''' - if len(mangle_result) == 2: - result_dtype, target_name = mangle_result - return CallMangleInfo( - target_name=target_name, - result_dtypes=(result_dtype,), - arg_dtypes=None) - - elif len(mangle_result) == 3: - result_dtype, target_name, actual_arg_dtypes = mangle_result - return CallMangleInfo( - target_name=target_name, - result_dtypes=(result_dtype,), - arg_dtypes=actual_arg_dtypes) - else: - raise ValueError("unexpected size of tuple returned by '%s'" - % mangler.__name__) - ''' + # TODO: This is bad.. everytime this wants to specializze a call it tries to + # rebuild this dictionary. Not Happening in my watch ;) Will replace this! + # Maybe we need to make an attribute which would store it. Let see + callable_dict = {**self.callables, **ast_builder.callables(self)} + + if identifier in callable_dict: + guess_callable = callable_dict[identifier] + specialized_callable = guess_callable.with_types(arg_id_to_dtype) + + if specialized_callable is not None: + # the specialized callable should be a tuple + specialized_callable, new_arg_id_to_dtype = specialized_callable + return specialized_callable, new_arg_id_to_dtype return None diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 7ba6e479c..3ff28c4b0 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -1,10 +1,12 @@ +from __future__ import division, absolute_import + import numpy as np from pytools import ImmutableRecord from loopy.diagnostic import LoopyError -from collections.abc import Mapping from loopy.kernel.instruction import CallInstruction +from loopy.types import NumpyType # {{{ argument descriptors @@ -19,40 +21,35 @@ class ArgDescriptor(ImmutableRecord): """ def __init__(self, - dtype=None, mem_scope=None, shape=None, dim_tags=None): super(ArgDescriptor).__init__(self, - dtype=dtype, + mem_scope=mem_scope, shape=shape, dim_tags=dim_tags) class ValueArgDescriptor(ArgDescriptor): """ - .. attribute dtype """ - def __init__(self, dtype): - super(ValueArgDescriptor).__init__(self, - dtype=dtype) + def __init__(self): + super(ValueArgDescriptor).__init__(self) class ArrayArgDescriptor(ArgDescriptor): """ - .. attribute:: dtype .. attribute:: mem_scope .. attribute:: shape .. attribute:: dim_tags """ def __init__(self, - dtype=None, mem_scope=None, shape=None, dim_tags=None): super(ArgDescriptor).__init__(self, - dtype=dtype, + mem_scope=mem_scope, shape=shape, dim_tags=dim_tags) @@ -88,54 +85,24 @@ class InKernelCallable(ImmutableRecord): The name of the callable which can be encountered within a kernel. - .. attrbute:: arg_id_to_descr - - A mapping from the id to dtypes of the argument - - .. attribute:: arg_id_to_keyword - - A mapping from the id to the keyword of the argument. - .. note:: Negative ids in the mapping attributes indicate the result arguments """ - def __init__(self, name=None, arg_id_to_descr=None, arg_id_to_keyword=None): + def __init__(self, name=None): # {{{ sanity checks if not isinstance(name, str): raise LoopyError("name of a InKernelCallable should be a string") - if not isinstance(arg_id_to_keyword, Mapping): - raise LoopyError("arg_id_to_keyword of a InKernelCallable should be a" - "mapping") - - for arg_id, keyword in arg_id_to_keyword.items(): - if not isinstance(keyword, str): - raise LoopyError("keyword must be a string") - - id_set = frozenset([id for id in arg_id_to_keyword]) - assert len(arg_id_to_descr) == max(id_set) - min(id_set) + 1, ("Either" - "repeated id encountered in arg_id_dtype or missed the dtype of some" - "of the keyword") - # }}} self.name = name - self.arg_id_to_descr = arg_id_to_descr - self.arg_id_to_keyword = arg_id_to_keyword - - # creating a inverse mapping which would be used a lot in the implementation - self.keyword_to_arg_id = {} - for arg_id, keyword in arg_id_to_keyword.items(): - self.keyword_to_arg_id[keyword] = arg_id - super(InKernelCallable).__init__(name=name, - arg_id_to_descr=arg_id_to_descr, - arg_id_to_keyword=arg_id_to_keyword) + super(InKernelCallable, self).__init__(name=name) def copy(self, name=None): if name is None: @@ -160,13 +127,6 @@ class InKernelCallable(ImmutableRecord): Any argument information exists both by its positional and its keyword identifier. """ - # An example for this would be interpreting the function call. - # The example func(1+j, 1) should be converted to func(1+j, 1+0j) - # Now what I don;t understand is why would the new_self. - # So this is a general function which says implement so and so function - # with so and so dtypes. - # So something like double sin(double x) is the function, then with - # types would be a transformation which would raise NotImplementedError() @@ -216,7 +176,11 @@ class InKernelCallable(ImmutableRecord): def is_ready_for_code_gen(self): - return False + raise NotImplementedError() + + def get_target_name(self, target): + + raise NotImplementedError() # {{{ code generation @@ -225,6 +189,11 @@ class InKernelCallable(ImmutableRecord): """ pass + def get_target_name(self, target): + # need to figure out from the old Function Mangler + pass + + def emit_call(self, target): pass @@ -235,39 +204,95 @@ class InKernelCallable(ImmutableRecord): and self.arg_id_to_descr == other.arg_id_to_descr and self.arg_id_to_keyword == other.arg_id_to_keyword) - def __hash__(self, other): - return hash((self.name, self.arg_id_to_descr, self.arg_id_to_keyword)) + def __hash__(self): + return hash((self.name, )) # }}} -# {{{ pass by value functions +# {{{ generic callable class -class PassByValueFunction(InKernelCallable): - """ A class for expressing the c-math library calls. All the functions are - assumed of the following function signature: z = f(a, b, c, d, ...) depending on - the arity. + +class CommonReturnTypeCallable(InKernelCallable): + """ A class of generic functions which have the following properties: + - Single return value + - Return type of the callable is a common dtype to all the input arguments + to the callable .. attribute:: name The name of the function as would be encountered in loopy. + ..attribute:: specialized_dtype + + The dtype for which the function has been setup to generate code and + premables. For example, the function `sin` can be specialized to either one + of the following `float sin(float x)` or `double sin(double x)`. This is not + usually expected to be an input as this removed the generality of the + callable. + + ..attribute:: kinds_allowed + + The extent upto which the function can be generalized upto. For example + `sin(x)` cannot have complex types as its specialized type. + + ..attribute:: arity + + The number of inputs that are to be given to the function + """ - def __init__(self, name=None, arg_dtypes=None, result_dtype=None): + def __init__(self, name=None, specialized_dtype=None, kinds_allowed=None, + arity=None): - self.name = name - self.arg_dtypes = arg_dtypes + super(CommonReturnTypeCallable, self).__init__(name=name) - if result_dtype is None: - result_dtype = np.find_common_type( - [], [dtype.numpy_dtype for dtype in arg_dtypes]) + self.specialized_dtype = specialized_dtype + self.kinds_allowed = kinds_allowed + self.arity = arity - self.result_dtype = result_dtype + def copy(self, specialized_dtype=None): + if specialized_dtype is None: + specialized_dtype = self.specialized_dtype - def emit_call(self, target): - raise NotImplementedError("PassByValueFUnction is always encountered" - "through an assignment") + return type(self)(self.name, specialized_dtype, + self.kinds_allowed, self.arity) + + def with_types(self, arg_id_to_dtype): + + specialized_dtype = np.find_common_type([], [dtype.numpy_dtype + for id, dtype in arg_id_to_dtype.items() if id >= 0]) + + if self.specialized_dtype is not None and (specialized_dtype != + self.specialized_dtype): + from loopy.warnings import warn + warn("Trying to change the type of the already set function." + "-- maybe use a different class instance?") + + new_arg_id_to_dtype = arg_id_to_dtype.copy() + # checking the compliance of the arg_id_to_dtype + + if -1 not in arg_id_to_dtype: + # return type was not know earlier, now setting it to the common type + new_arg_id_to_dtype[-1] = NumpyType(specialized_dtype) + + if self.arity+1 == len(new_arg_id_to_dtype) and (specialized_dtype.kind in + self.kinds_allowed): + # the function signature matched with the current instance. + # returning the function and the new_arg_id_to_dtype + for i in range(self.arity): + new_arg_id_to_dtype[i] = NumpyType(specialized_dtype) + + return (self.copy(specialized_dtype=specialized_dtype), + new_arg_id_to_dtype) + + return None + + def is_ready_for_code_gen(self): + return self.specilized_dtype is not None + + def get_target_specific_name(self, target): + raise NotImplementedError() def get_preamble(self, target): raise NotImplementedError() @@ -275,6 +300,82 @@ class PassByValueFunction(InKernelCallable): # }}} +# {{{ specific type callable class + + +class SpecificReturnTypeCallable(InKernelCallable): + """ A super class for the funcitons which cannot be listed as generic + functions. These types of Callables support explicity mentioning of the + arguments and result dtypes. + + .. attribute:: name + + The name of the function as would be encountered in loopy. + + .. attribute:: arg_id_to_dtype + + The dtype pattern of the arguments which is supposed to be used for checking + the applicability of this function in a given scenario. + """ + + def __init__(self, name=None, arg_id_to_dtype=None): + + super(SpecificReturnTypeCallable, self).__init__(name=name) + + if arg_id_to_dtype is None: + LoopyError("The function signature is incomplete without the" + "`arg_id_to_dtype`") + self.arg_id_to_dtype = arg_id_to_dtype + + def with_types(self, arg_id_to_dtype): + + # Checking the number of inputs + if len([id for id in arg_id_to_dtype if id >= 0]) != len( + [id for id in self.arg_id_to_dtype if id >= 0]): + # the number of input arguments do not match + return None + + # Checking the input dtypes + for id, dtype in arg_id_to_dtype.items(): + if id in self.arg_id_to_dtype and self.arg_id_to_dtype[id] == dtype: + # dtype matched with the one given in the input + pass + else: + # did not match with the function signature and hence returning + # None + return None + + # Setting the output if not present + new_arg_id_to_dtype = arg_id_to_dtype.copy() + for id, dtype in self.arg_id_to_dtype: + if id < 0: + # outputs + if id in new_arg_id_to_dtype and new_arg_id_to_dtype[id] != dtype: + # the output dtype had been supplied but did not match with the + # one in the function signature + return None + + new_arg_id_to_dtype[id] = dtype + + # Finally returning the types + return self.copy(), new_arg_id_to_dtype + + def is_ready_for_code_gen(self): + # everything about the function is determined at the constructor itself, + # hence always redy for codegen + return True + + def get_target_specific_name(self, target): + # defaults to the name of the function in Loopy. May change this specific to + # a target by inheriting this class and overriding this function. + return self.name + + def get_preamble(self, target): + return "" + +# }}} + + # {{{ helper function for CallableKernel def get_arg_id_to_keyword_from_kernel(kernel): diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 9ebcf73e9..c75a804ae 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -964,8 +964,8 @@ class CallInstruction(MultiAssignmentBase): forced_iname_deps_is_final=forced_iname_deps_is_final) from pymbolic.primitives import Call - from loopy.symbolic import Reduction - if not isinstance(expression, (Call, Reduction)) and expression is not None: + from loopy.symbolic import Reduction, ArrayCall + if not isinstance(expression, (ArrayCall, Call, Reduction)) and expression is not None: raise LoopyError("'expression' argument to CallInstruction " "must be a function call") diff --git a/loopy/library/function.py b/loopy/library/function.py index 9d557ac9f..6582ba56f 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -23,37 +23,4 @@ THE SOFTWARE. """ -def default_function_mangler(kernel, name, arg_dtypes): - from loopy.library.reduction import reduction_function_mangler - - manglers = [reduction_function_mangler, tuple_function_mangler] - for mangler in manglers: - result = mangler(kernel, name, arg_dtypes) - if result is not None: - return result - - return None - - -def single_arg_function_mangler(kernel, name, arg_dtypes): - if len(arg_dtypes) == 1: - dtype, = arg_dtypes - - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo(name, (dtype,), (dtype,)) - - return None - - -def tuple_function_mangler(kernel, name, arg_dtypes): - if name == "make_tuple": - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="loopy_make_tuple", - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - return None - - # vim: foldmethod=marker diff --git a/loopy/library/random123.py b/loopy/library/random123.py index b8633114d..9d971c376 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -180,7 +180,13 @@ def random123_preamble_generator(preamble_info): )) +def random123_callables(kernel): + # This is just to test whether the rest of the code is working + return {} + + def random123_function_mangler(kernel, name, arg_dtypes): + pass try: rng_variant = FUNC_NAMES_TO_RNG[name] except KeyError: diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 0e5a093b7..11f3007f8 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -422,6 +422,14 @@ def parse_reduction_op(name): # }}} +def reduction_callables(): + return {} + # TODO: So what's the problem over here? + # I can generate the callables for everythin except max and min, + # A long time solution should be to have a type for the array dtypes + pass + + def reduction_function_mangler(kernel, func_id, arg_dtypes): if isinstance(func_id, ArgExtOp): from loopy.target.opencl import CTarget diff --git a/loopy/symbolic.py b/loopy/symbolic.py index b0c27798f..3d01b5dfb 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -108,7 +108,7 @@ class IdentityMapperMixin(object): def map_array_call(self, expr, *args): return ArrayCall(expr.function, - expr.parameters) + expr.parameters, expr.kw_parameters) def map_sub_array_ref(self, expr, *args): return SubArrayRef(expr.swept_inames, expr.subscript) @@ -708,26 +708,32 @@ class SubArrayRef(p.Expression): mapper_method = intern("map_sub_array_ref") -class ArrayCall(p.Call): +class ArrayCall(p.CallWithKwargs): """Represents an function call over an array across :attr: `inames`. """ - def __init__(self, function, parameters): + def __init__(self, function, parameters, kw_parameters=None): + + # {{{ sanity checks - # {{{ Input Sanity Checks assert isinstance(function, p.Variable) assert isinstance(parameters, tuple) for par in parameters: assert isinstance(par, SubArrayRef) + # }}} self.function = function self.parameters = parameters + self.kw_parameters = kw_parameters def stringifier(self): return StringifyMapper + def __hash__(self): + return hash((self.function, self.parameters, self.kw_parameters)) + mapper_method = intern("map_array_call") @@ -1174,7 +1180,8 @@ class FunctionToPrimitiveMapper(IdentityMapper): def map_call(self, expr): from loopy.library.reduction import parse_reduction_op - # {{{ Handling ArrayCalls + # {{{ handling array calls + encountered_sub_array_ref = False for par in expr.parameters: if isinstance(par, SubArrayRef): @@ -1182,6 +1189,7 @@ class FunctionToPrimitiveMapper(IdentityMapper): break if encountered_sub_array_ref: return ArrayCall(expr.function, expr.parameters) + # }}} if not isinstance(expr.function, p.Variable): @@ -1242,6 +1250,20 @@ class FunctionToPrimitiveMapper(IdentityMapper): else: return IdentityMapper.map_call(self, expr) + def map_call_with_kwargs(self, expr): + + # {{{ handling array calls + + for par in expr.parameters + expr.kw_parameters.values(): + if isinstance(par, SubArrayRef): + return ArrayCall(expr.function, expr.parameters, expr.kw_parameters) + + # }}} + + raise NotImplementedError("CallWithKwargs is only supported for ArrayCalls") + + + # {{{ customization to pymbolic parser diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index b98e14998..1885b63f8 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -150,8 +150,8 @@ class ASTBuilderBase(object): # {{{ library - def function_manglers(self): - return [] + def callables(self, kernel): + return {} def symbol_manglers(self): return [] diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index cc3a70ebd..60c2ea940 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -27,9 +27,10 @@ THE SOFTWARE. import six import numpy as np # noqa -from loopy.kernel.function_interface import PassByValueFunction, InKernelCallable +from loopy.kernel.function_interface import (CommonReturnTypeCallable, + SpecificReturnTypeCallable) from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder -from loopy.diagnostic import LoopyError, LoopyTypeError +from loopy.diagnostic import LoopyError from cgen import Pointer, NestedDeclarator, Block from cgen.mapper import IdentityMapper as CASTIdentityMapperBase from pymbolic.mapper.stringifier import PREC_NONE @@ -78,6 +79,10 @@ class DTypeRegistryWrapper(object): # {{{ preamble generator def _preamble_generator(preamble_info): + # TODO: + # No need for this! + # This will go into the generate premble for the functions + return c_funcs = set(func.c_name for func in preamble_info.seen_functions) if "int_floor_div" in c_funcs: yield ("05_int_floor_div", """ @@ -356,70 +361,39 @@ def c_symbol_mangler(kernel, name): # {{{ function mangler -def c_math_mangler(target, name, arg_dtypes, modify_name=True): - # Function mangler for math functions defined in C standard - # Convert abs, min, max to fabs, fmin, fmax. - # If modify_name is set to True, function names are modified according to - # floating point types of the arguments (e.g. cos(double), cosf(float)) - # This should be set to True for C and Cuda, False for OpenCL - if not isinstance(name, str): - return None +class CMathCallable(CommonReturnTypeCallable): + def get_target_specific_name(self, target): + if not self.is_ready_for_codegen(): + raise LoopyError("Trying to generate ") + assert isinstance(target, CTarget) - if name in ["abs", "min", "max"]: - name = "f" + name + if self.name in ["abs", "max", "min"]: + target_name = "f" + self.name - # unitary functions - if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", - "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] - and len(arg_dtypes) == 1 - and arg_dtypes[0].numpy_dtype.kind == "f"): + return target_name - dtype = arg_dtypes[0].numpy_dtype + def get_preamble(self, target): + assert isinstance(target, CTarget) - if modify_name: - if dtype == np.float64: - pass # fabs - elif dtype == np.float32: - name = name + "f" # fabsf - elif dtype == np.float128: - name = name + "l" # fabsl - else: - raise LoopyTypeError("%s does not support type %s" % (name, dtype)) - - return PassByValueFunction( - name=name, - arg_dtypes=arg_dtypes, - result_dtype=arg_dtypes[0]) - - # binary functions - if (name in ["fmax", "fmin"] - and len(arg_dtypes) == 2): - - dtype = np.find_common_type( - [], [dtype.numpy_dtype for dtype in arg_dtypes]) - - if dtype.kind == "c": - raise LoopyTypeError("%s does not support complex numbers") - - elif dtype.kind == "f": - if modify_name: - if dtype == np.float64: - pass # fmin - elif dtype == np.float32: - name = name + "f" # fminf - elif dtype == np.float128: - name = name + "l" # fminl - else: - raise LoopyTypeError("%s does not support type %s" - % (name, dtype)) - - result_dtype = NumpyType(dtype) - return PassByValueFunction( - name=name, - arg_dtypes=2*(result_dtype,), - result_dtypes=result_dtype) + return r'#include "math.h"' + + +def collect_c_generic_callables(): + unary_functions = ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", + "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] + + binary_functions = ["max", "min"] + + function_dict = {} + + for func in unary_functions: + function_dict[func] = CMathCallable(name=func, kinds_allowed=['f'], arity=1) + + for func in binary_functions: + function_dict[func] = CMathCallable(name=func, kinds_allowed=['f'], arity=2) + + return function_dict - return None # }}} @@ -427,11 +401,10 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): class CASTBuilder(ASTBuilderBase): # {{{ library - def function_manglers(self): + def callables(self, kernel): return ( - super(CASTBuilder, self).function_manglers() + [ - c_math_mangler - ]) + {**super(CASTBuilder, self).callables(kernel), + **collect_c_generic_callables()}) def symbol_manglers(self): return ( @@ -836,6 +809,10 @@ class CASTBuilder(ASTBuilderBase): return block_if_necessary(assignments) def emit_call(self, codegen_state, insn): + # TODO: Maybe we want this whole thing to be stuffed down within the + # Callable class definition. + 1/0 + # haha.. can't come over here till the whole thing has been handled ecm = codegen_state.expression_to_code_mapper from pymbolic.primitives import Variable @@ -857,10 +834,20 @@ class CASTBuilder(ASTBuilderBase): if insn.is_array_call: func_id = "{func}_{insn}".format(func=func_id, insn=insn.id) - mangle_result = PassByValueFunction( - name=func_id, - arg_dtypes=par_dtypes, - result_dtype=par_dtypes[0]) + # TODO: Since dont have an interface now for the kernel. Cheating ;) by + # having my own funciton signature created at the momnent + # Note that when you do the sort of handling make sure you also have + # something to handle the keywrods obtained over here. + # Wow this is too baaad(currently).... + # Make changes so that the assigneees have been assignees negative ids. + arg_id_to_dtype = {} + for id, dtype in enumerate(par_dtypes): + arg_id_to_dtype[id] = dtype + + mangle_result = SpecificReturnTypeCallable( + name=func_id, + arg_dtypes=par_dtypes, + result_dtype=par_dtypes[0]) else: mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) @@ -972,84 +959,6 @@ class CASTBuilder(ASTBuilderBase): from cgen import If return If(condition_str, ast) - def emit_array_call(self, codegen_state, insn): - ecm = codegen_state.expression_to_code_mapper - - from pymbolic.mapper.stringifier import PREC_NONE - - func_id = insn.expression.function.name + "_" + str(insn.id) - parameters = insn.expression.parameters - assignments = insn.assignees - - assignee_var_descriptors = [ - codegen_state.kernel.get_var_descriptor(a) - for a in insn.assignee_var_names()] - - par_dtypes = tuple([ecm.infer_type(par) for par in parameters] - + [ecm.infer_type(ass) for ass in insn.assignees]) - - if insn.expression.function.name not in ( - codegen_state.kernel.auxiliary_kernels): - raise RuntimeError("function '%s' unknown--" - "maybe you need to register a callable kernel?" - % insn.expression.function.name) - - # FIXME: This has to be interpreted from the kernel - # Assumption: That the compatibilty checks would be done by - # `register_knl` - mangle_result = InKernelCallable( - name=1/0, - result_dtypes=par_dtypes, - arg_dtypes=par_dtypes) - - assert mangle_result.arg_dtypes is not None - - from loopy.expression import dtype_to_type_context - - c_parameters = [ - ecm(par, PREC_NONE, - dtype_to_type_context(self.target, tgt_dtype), - tgt_dtype).expr - for par, par_dtype, tgt_dtype in zip( - parameters+assignments, par_dtypes, mangle_result.arg_dtypes)] - - from loopy.codegen import SeenFunction - codegen_state.seen_functions.add( - SeenFunction(func_id, - mangle_result.target_name, - mangle_result.arg_dtypes)) - - from pymbolic import var - for i, (a, tgt_dtype) in enumerate( - zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): - if tgt_dtype != ecm.infer_type(a): - raise LoopyError("type mismatch in %d'th (1-based) left-hand " - "side of instruction '%s'" % (i+1, insn.id)) - c_parameters.append( - # TODO Yuck: The "where-at function": &(...) - var("&")( - ecm(a, PREC_NONE, - dtype_to_type_context(self.target, tgt_dtype), - tgt_dtype).expr)) - - from pymbolic import var - result = var(mangle_result.target_name)(*c_parameters) - - # In case of no assignees, we are done - if len(mangle_result.result_dtypes) == 0: - from cgen import ExpressionStatement - return ExpressionStatement( - CExpression(self.get_c_expression_to_code_mapper(), result)) - - result = ecm.wrap_in_typecast( - mangle_result.result_dtypes[0], - assignee_var_descriptors[0].dtype, - result) - - from cgen import ExpressionStatement - return ExpressionStatement( - CExpression(self.get_c_expression_to_code_mapper(), result)) - # }}} def process_ast(self, node): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index b35b9b26d..3a467fef3 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -40,7 +40,7 @@ from pymbolic import var from loopy.expression import dtype_to_type_context from loopy.type_inference import TypeInferenceMapper -from loopy.kernel.function_interface import PassByValueFunction +from loopy.kernel.function_interface import InKernelCallable from loopy.diagnostic import LoopyError, LoopyWarning from loopy.tools import is_integer @@ -436,30 +436,37 @@ class ExpressionToCExpressionMapper(IdentityMapper): if isinstance(identifier, Variable): identifier = identifier.name + arg_id_to_dtype = {} + for id, par in enumerate(expr.parameters): + arg_id_to_dtype[id] = self.infer_type(par) + par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) processed_parameters = None - mangle_result = self.kernel.mangle_function( - identifier, par_dtypes, - ast_builder=self.codegen_state.ast_builder) + specialized_function, new_arg_id_to_dtype = ( + self.kernel.get_specialized_callable( + identifier, arg_id_to_dtype, + ast_builder=self.codegen_state.ast_builder)) - if mangle_result is None: + if specialized_function is None: raise RuntimeError("function '%s' unknown--" - "maybe you need to register a function mangler?" + "maybe you need to register a function?" % identifier) - if not isinstance(mangle_result, PassByValueFunction): + if not isinstance(specialized_function, InKernelCallable): raise LoopyError("functions with more or fewer than one return value " "may not be used in an expression") - if mangle_result.arg_dtypes is not None: + new_par_dtypes = tuple(new_arg_id_to_dtype[id] for id in + sorted(new_arg_id_to_dtype) if id >= 0) + if new_arg_id_to_dtype is not None: processed_parameters = tuple( self.rec(par, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip( - expr.parameters, par_dtypes, mangle_result.arg_dtypes)) + expr.parameters, par_dtypes, new_par_dtypes)) else: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to @@ -476,13 +483,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): "return CallMangleInfo.arg_dtypes" % identifier, LoopyWarning) - from loopy.codegen import SeenFunction - self.codegen_state.seen_functions.add( - SeenFunction(identifier, - mangle_result.name, - mangle_result.arg_dtypes or par_dtypes)) + self.codegen_state.seen_functions.add(specialized_function) - return var(mangle_result.name)(*processed_parameters) + return var(specialized_function.get_target_specific_name(self.kernel.target))( + *processed_parameters) # {{{ deal with complex-valued variables diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 027f27838..da8c04382 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -112,6 +112,73 @@ def _register_vector_types(dtype_registry): # {{{ function mangler + +_CUDA_SIMPLE_MULTI_ARG_FUNCTIONS = { + "atan2": 2 + } + + +def CudaMathCallable(CommonReturnTypeCallable): + def get_target_specific_name(self, target): + if not self.is_ready_for_codegen(): + raise LoopyError("Trying to generate ") + assert isinstance(target, CudaTarget) + + if self.name in ["abs", "max", "min"]: + target_name = "f" + self.name + + dtype = self.specialized_dtype + + if dtype == np.float64: + pass # fabs + elif dtype == np.float32: + target_name = target_name + "f" # fabsf + elif dtype == np.float128: + target_name = target_name + "l" # fabsl + else: + raise LoopyTypeError("%s does not support type %s" % (name, dtype)) + + return target_name + + def get_preambles(self, target): + assert isinstance(target, CudaTarget) + + +def collect_cuda_generic_callables(collectible_dict): + unary_functions = ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", + "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] + + binary_functions = ["max", "min"] + + for func in unary_functions: + if func in collectible_dict: + raise LoopyError("Cannot map the same name to different generic function" + "types") + + collectible_dict[func] = CudaMathCallable(name=func, kinds_allowed=['f'], + arity=1) + + for func in binary_functions: + if func in collectible_dict: + raise LoopyError("Cannot map the same name to different generic function" + "types") + + collectible_dict[func] = CudaMathCallable(name=func, kinds_allowed=['f'], + arity=2) + + for func, num_args in _CUDA_SIMPLE_MULTI_ARG_FUNCTIONS.items(): + if func in collectible_dict: + raise LoopyError("Cannot map the same name to different generic function" + "types") + + collectible_dict[func] = CudaMathCallable(name=func, kinds_allowed=['f'], + arity=num_args) + + # FIXME: dot is yet to be implemented + + return collectible_dict + + def cuda_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index bb1b59c8b..2e4b23358 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -31,13 +31,12 @@ from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from pytools import memoize_method from loopy.diagnostic import LoopyError from loopy.types import NumpyType -from loopy.target.c import DTypeRegistryWrapper, c_math_mangler +from loopy.target.c import DTypeRegistryWrapper from loopy.kernel.data import temp_var_scope -from loopy.kernel.function_interface import PassByValueFunction +from loopy.kernel.function_interface import (CommonReturnTypeCallable, + SpecificReturnTypeCallable) from pymbolic import var -from functools import partial - # {{{ dtype registry wrappers @@ -140,11 +139,12 @@ def _register_vector_types(dtype_registry): # }}} -# {{{ function mangler +# {{{ OpenCL callables _CL_SIMPLE_MULTI_ARG_FUNCTIONS = { + "dot": 2, "clamp": 3, - "atan2": 2, + "atan2": 2 } @@ -166,61 +166,63 @@ VECTOR_LITERAL_FUNCS = dict( ) -def opencl_function_mangler(kernel, name, arg_dtypes): - if not isinstance(name, str): - return None +class CLMathCallable(CommonReturnTypeCallable): + def get_target_specific_name(self, target): + if not self.is_ready_for_codegen(): + raise LoopyError("Trying to generate ") + assert isinstance(target, OpenCLTarget) + + if self.name in ["abs", "max", "min"]: + target_name = "f" + self.name + + return target_name + + def get_preamble(self, target): + return "" + + +class CLSpecificCallable(SpecificReturnTypeCallable): + pass + + +def collect_cl_generic_callables(): + unary_functions = ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", + "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] - # OpenCL has min(), max() for integer types - if name in ["max", "min"] and len(arg_dtypes) == 2: - dtype = np.find_common_type( - [], [dtype.numpy_dtype for dtype in arg_dtypes]) - - if dtype.kind == "i": - result_dtype = NumpyType(dtype) - return PassByValueFunction( - name=name, - arg_dtypes=2*(result_dtype,), - result_dtypes=result_dtype) - - if name == "dot": - scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] - return PassByValueFunction( - name=name, - arg_dtypes=(arg_dtypes[0],)*2, - result_dtype=NumpyType(scalar_dtype)) - - if name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS: - num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name] - if len(arg_dtypes) != num_args: - raise LoopyError("%s takes %d arguments (%d received)" - % (name, num_args, len(arg_dtypes))) - - dtype = np.find_common_type( - [], [dtype.numpy_dtype for dtype in arg_dtypes]) - - if dtype.kind == "c": - raise LoopyError("%s does not support complex numbers" - % name) - - result_dtype = NumpyType(dtype) - return PassByValueFunction( - name=name, - arg_dtypes=(result_dtype,)*num_args, - result_dtype=result_dtype) - - if name in VECTOR_LITERAL_FUNCS: + binary_functions = ["max", "min"] + + function_dict = {} + + for func in unary_functions: + function_dict[func] = CLMathCallable(name=func, kinds_allowed=['f'], + arity=1) + + for func in binary_functions: + function_dict[func] = CLMathCallable(name=func, kinds_allowed=['f', 'i', + 'u'], + arity=2) + + for func, num_args in _CL_SIMPLE_MULTI_ARG_FUNCTIONS.items(): + function_dict[func] = CLMathCallable(name=func, kinds_allowed=['f'], + arity=num_args) + + return function_dict + + +def collect_cl_specific_callables(kernel): + function_dict = {} + for name in VECTOR_LITERAL_FUNCS: base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name] - if count != len(arg_dtypes): - return None + arg_id_to_dtype = {} + for i in range(count): + arg_id_to_dtype[i] = NumpyType(dtype) + arg_id_to_dtype[-1] = kernel.target.vector_dtype(NumpyType(dtype), count) - return PassByValueFunction( - name="(%s%d) " % (base_tp_name, count), - arg_dtypes=(NumpyType(dtype),)*count, - result_dtypes=kernel.target.vector_dtype( - NumpyType(dtype), count)) + function_dict[name] = CLSpecificCallable(name, arg_id_to_dtype) + + return function_dict - return None # }}} @@ -357,7 +359,6 @@ class OpenCLTarget(CTarget): vec.types[base.numpy_dtype, count], target=self) - # }}} # }}} @@ -367,13 +368,11 @@ class OpenCLTarget(CTarget): class OpenCLCASTBuilder(CASTBuilder): # {{{ library - def function_manglers(self): + def callables(self, kernel): return ( - [ - opencl_function_mangler, - partial(c_math_mangler, modify_name=False) - ] + - super(OpenCLCASTBuilder, self).function_manglers()) + {**super(OpenCLCASTBuilder, self).callables(kernel), + **collect_cl_generic_callables(), + **collect_cl_specific_callables(kernel)}) def symbol_manglers(self): return ( diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 504fb30ca..18fbf45fc 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -31,7 +31,7 @@ from six.moves import range import numpy as np -from loopy.kernel.function_interface import PassByValueFunction +from loopy.kernel.function_interface import CommonReturnTypeCallable from loopy.target.opencl import OpenCLTarget, OpenCLCASTBuilder from loopy.target.python import PythonASTBuilderBase from loopy.types import NumpyType @@ -199,35 +199,34 @@ def check_sizes(kernel, device): # }}} -def pyopencl_function_mangler(target, name, arg_dtypes): - if len(arg_dtypes) == 1 and isinstance(name, str): - arg_dtype, = arg_dtypes +class PyOpenCLMathCallable(CommonReturnTypeCallable): + def get_target_specific_name(self, target): + assert isinstance(target, PyOpenCLTarget) - if arg_dtype.is_complex(): - if arg_dtype.numpy_dtype == np.complex64: - tpname = "cfloat" - elif arg_dtype.numpy_dtype == np.complex128: - tpname = "cdouble" + dtype = self.specialized_dtype + target_name = self.name + + if self.name in ["sqrt", "exp", "log", "sin", "cos", "tan", "sinh", "cosh", + "tanh", "conj"] and dtype.kind == 'c': + if dtype.numpy_dtype == np.complex64: + target_name = target_name + "cfloat" + elif dtype.numpy_dtype == np.complex128: + target_name = target_name + "cdouble" else: - raise RuntimeError("unexpected complex type '%s'" % arg_dtype) + raise RuntimeError("unexpected complex type '%s'" % dtype) + + return target_name - if name in ["sqrt", "exp", "log", - "sin", "cos", "tan", - "sinh", "cosh", "tanh", - "conj"]: - return PassByValueFunction( - name="%s_%s" % (tpname, name), - arg_dtypes=(arg_dtype,), - result_dtype=arg_dtype) - if name in ["real", "imag", "abs"]: - return PassByValueFunction( - name="%s_%s" % (tpname, name), - arg_dtypes=(arg_dtype,), - result_dtypes=NumpyType( - np.dtype(arg_dtype.numpy_dtype.type(0).real))) +def collect_pyopencl_generic_callables(kernel): + function_dict = {} + for name in ["sqrt", "exp", "log", "sin", "cos", "tan", "sinh", "cosh", "tanh", + "conj"]: + function_dict[name] = PyOpenCLMathCallable(name=name, kinds_allowed=['f', + 'c'], arity=1) - return None + # TODO: Need to add real, imag, abs for complex numbers + return function_dict # {{{ preamble generator @@ -738,13 +737,12 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): # {{{ library - def function_manglers(self): - from loopy.library.random123 import random123_function_mangler + def callables(self, kernel): + from loopy.library.random123 import random123_callables return ( - super(PyOpenCLCASTBuilder, self).function_manglers() + [ - pyopencl_function_mangler, - random123_function_mangler - ]) + {**super(PyOpenCLCASTBuilder, self).callables(kernel), + **collect_pyopencl_generic_callables(kernel), + **random123_callables(kernel)}) def preamble_generators(self): from loopy.library.random123 import random123_preamble_generator diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 533d623fd..e4334c328 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -34,8 +34,6 @@ from loopy.diagnostic import ( LoopyError, TypeInferenceFailure, DependencyTypeInferenceFailure) -from loopy.kernel.function_interface import PassByValueFunction - import logging @@ -274,17 +272,23 @@ class TypeInferenceMapper(CombineMapper): if None in arg_dtypes: return [] - mangle_result = self.kernel.mangle_function(identifier, arg_dtypes) + arg_id_to_dtype = {} + for id, dtype in enumerate(arg_dtypes): + arg_id_to_dtype[id] = dtype + + specialized_callable = self.kernel.get_specialized_callable(identifier, + arg_id_to_dtype) + if return_tuple: - if mangle_result is not None: - return [mangle_result.result_dtypes] + if specialized_callable is not None: + _, new_arg_id_to_dtype = specialized_callable + result_dtypes = (dtype for id, dtype in new_arg_id_to_dtype + if id < 0) + return [result_dtypes] else: - if mangle_result is not None: - if not isinstance(mangle_result, PassByValueFunction): - raise LoopyError("functions with more or fewer than one " - "return value may only be used in direct assignments") - - return [mangle_result.result_dtype] + if specialized_callable is not None: + _, new_arg_id_to_dtype = specialized_callable + return [new_arg_id_to_dtype[-1]] raise RuntimeError("unable to resolve " "function '%s' with %d given arguments" -- GitLab From 6393a242a29de47aa6c0b5303fdb73f02258ad19 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 10 Mar 2018 19:00:32 -0600 Subject: [PATCH 110/116] [ci skip] Flake 8 --- loopy/symbolic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 3d01b5dfb..c548c9375 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1263,8 +1263,6 @@ class FunctionToPrimitiveMapper(IdentityMapper): raise NotImplementedError("CallWithKwargs is only supported for ArrayCalls") - - # {{{ customization to pymbolic parser _open_dbl_bracket = intern("open_dbl_bracket") -- GitLab From d40135ea42c357319b9abdf1345cb2e475f17211 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Mar 2018 13:28:15 -0500 Subject: [PATCH 111/116] Started with the CallableKernel --- loopy/kernel/function_interface.py | 82 ++++++++++++++++-------------- loopy/symbolic.py | 5 +- loopy/transform/register_knl.py | 2 +- 3 files changed, 48 insertions(+), 41 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 3ff28c4b0..454ec9d61 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -178,24 +178,20 @@ class InKernelCallable(ImmutableRecord): raise NotImplementedError() - def get_target_name(self, target): - - raise NotImplementedError() - # {{{ code generation def generate_preambles(self, target): """ This would generate the target specific preamble. """ - pass + raise NotImplementedError() - def get_target_name(self, target): - # need to figure out from the old Function Mangler - pass + def get_target_specific_name(self, target): + raise NotImplementedError() def emit_call(self, target): - pass + + raise NotImplementedError() # }}} @@ -403,19 +399,22 @@ def get_arg_id_to_keyword_from_kernel(kernel): class CallableKernel(InKernelCallable): """ - .. attribute:: subkernel - .. attribute:: arg_id_to_keyword + ..attribute:: name + + This would be the name by which the function would be called in the loopy + kernel. - .. attribute:: arg_id_dtype + .. attribute:: subkernel + + The subkernel associated with the call. """ - def __init__(self, name, subkernel): + def __init__(self, name=None, subkernel=None): - self.name = name + super(CallableKernel, self).__init__(name=name) self.subkernel = subkernel - self.arg_id_to_keyword = get_arg_id_to_keyword_from_kernel(subkernel) def copy(self, name=None, subkernel=None): if name is None: @@ -424,19 +423,9 @@ class CallableKernel(InKernelCallable): if subkernel is None: subkernel = self.subkernel - return CallableKernel(name=name, + return self.__class__(name=name, subkernel=subkernel) - def is_arg_written(self, arg_id): - """ Checks whether a given argument is written - """ - if isinstance(arg_id, int): - arg_id = self.arg_id_to_keyword[arg_id] - - assert isinstance(arg_id, str) - - return arg_id in self.subkernel.get_written_variables() - def pre_process_check(self, insn): """ Before the pre_processing of the parent kernel performs simple checks to @@ -457,23 +446,40 @@ class CallableKernel(InKernelCallable): raise LoopyError("The number of parameters do not match") def with_types(self, arg_id_to_dtype): - # can't exactly figure out the case when the kernel wont agree the - # arg_id_to_dtype in the case CallableKernel - new_args = [] - arg_dict = self.subkernel.arg_dict - arg_keyword_to_dtype = {} - for id, dtype in arg_id_to_dtype: + # {{{ sanity checks for arg_id_to_dtype:w + + for id in arg_id_to_dtype: if not isinstance(id, str): - id = self.id_to_keyword[id] - arg_keyword_to_dtype[id] = dtype + raise LoopyError("For Callable kernels the input should be all given" + "as KWargs") - for keyword, dtype in arg_keyword_to_dtype.items(): - new_args.append(arg_dict[keyword].copy(dtype=dtype)) + # }}} - new_subkernel = self.subkernel.copy(args=new_args) + new_subkernel_args = [] + + # I need to scan through all the ids and set the subkernel arguments + # accordingly. + 1/0 + + for id, dtype in arg_id_to_dtype.items(): + # this just took and intereseting turn. + # So over here I need to do the type inference based on the given inputs. + # Wow + # I think this should be possible + # first run a few tryable kind of thinds in the ad-hoc terminal + dtype_in_subkernel = self.subkernel.arg_dict[id].dtype + if dtype_in_subkernel == (auto or None): + + if not dtype == dtype_in_subkernel: + return None + + new_arg_id_to_dtype = arg_id_to_dtype.copy() + for id in self.subkernel.get_written_variables(): + if id not in arg_id_to_dtype: + new_arg_id_to_dtype[id] = self.subkerenel.arg_dict[id].dtype - return self.copy(subkernel=new_subkernel) + return self.copy() # }}} diff --git a/loopy/symbolic.py b/loopy/symbolic.py index c548c9375..c3a84fa43 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -732,7 +732,8 @@ class ArrayCall(p.CallWithKwargs): return StringifyMapper def __hash__(self): - return hash((self.function, self.parameters, self.kw_parameters)) + return hash((self.function, self.parameters + + tuple(self.kw_parameters.values()))) mapper_method = intern("map_array_call") @@ -1254,7 +1255,7 @@ class FunctionToPrimitiveMapper(IdentityMapper): # {{{ handling array calls - for par in expr.parameters + expr.kw_parameters.values(): + for par in expr.parameters + tuple(expr.kw_parameters.values()): if isinstance(par, SubArrayRef): return ArrayCall(expr.function, expr.parameters, expr.kw_parameters) diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 537970b60..8dabf64fc 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -71,7 +71,7 @@ def register_callable_kernel(parent, function_name, child): # }}} - # FIXME: needs checks whether the kernels are compliant + from loopy.kernel.function_interface import CallableKernel new_auxiliary_kernels = parent.auxiliary_kernels new_auxiliary_kernels[function_name] = child -- GitLab From 5664881fa93607be0efac93c7cca6f136483ac93 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Mar 2018 06:23:12 -0500 Subject: [PATCH 112/116] Written agr_id_to_descr --- loopy/kernel/function_interface.py | 148 ++++++++++++++++++----------- loopy/transform/register_knl.py | 1 - 2 files changed, 90 insertions(+), 59 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 454ec9d61..a18f4cd4d 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -40,17 +40,14 @@ class ValueArgDescriptor(ArgDescriptor): class ArrayArgDescriptor(ArgDescriptor): """ .. attribute:: mem_scope - .. attribute:: shape .. attribute:: dim_tags """ def __init__(self, mem_scope=None, - shape=None, dim_tags=None): super(ArgDescriptor).__init__(self, mem_scope=mem_scope, - shape=shape, dim_tags=dim_tags) def copy(self, dtype=None, mem_scope=None, shape=None, dim_tags=None): @@ -60,16 +57,11 @@ class ArrayArgDescriptor(ArgDescriptor): if mem_scope is None: mem_scope = self.mem_scope - if shape is None: - shape = self.shape - if dim_tags is None: dim_tags = self.dim_tags return ArrayArgDescriptor( - dtype=dtype, mem_scope=mem_scope, - shape=shape, dim_tags=dim_tags) @@ -110,7 +102,7 @@ class InKernelCallable(ImmutableRecord): return InKernelCallable(name=name) - def with_types(self, arg_id_to_descr): + def with_types(self, arg_id_to_dtype): """ :arg arg_id_to_type: a mapping from argument identifiers (integers for positional arguments, names for keyword @@ -130,9 +122,9 @@ class InKernelCallable(ImmutableRecord): raise NotImplementedError() - def with_shapes_and_dim_tags(self, arg_id_to_arg_descr): + def with_descrs(self, arg_id_to_descr): """ - :arg arg_id_to_arg_descr: a mapping from argument identifiers + :arg arg_id_to_descr: a mapping from argument identifiers (integers for positional arguments, names for keyword arguments) to :class:`loopy.ArrayArgDescriptor` instances. Unspecified/unknown types are not represented in *arg_id_to_descr*. @@ -142,7 +134,7 @@ class InKernelCallable(ImmutableRecord): :returns: a tuple ``(new_self, arg_id_to_type)``, where *new_self* is a new :class:`InKernelCallable` specialized for the given types, - and *arg_id_to_type* is a mapping of the same form as the + and *arg_id_to_descr* is a mapping of the same form as the argument above, however it may have more information present. Any argument information exists both by its positional and its keyword identifier. @@ -372,31 +364,6 @@ class SpecificReturnTypeCallable(InKernelCallable): # }}} -# {{{ helper function for CallableKernel - -def get_arg_id_to_keyword_from_kernel(kernel): - - arg_id_to_keyword = {} - written_args = kernel.get_written_variables() - - read_count = 0 - write_count = -1 - - for arg in kernel.args: - if arg.name in written_args: - arg_id_to_keyword[write_count] = arg.name - write_count -= 1 - else: - arg_id_to_keyword[read_count] = arg.name - read_count += 1 - - return arg_id_to_keyword - -# }}} - - -# {{{ callable kernel - class CallableKernel(InKernelCallable): """ @@ -411,11 +378,17 @@ class CallableKernel(InKernelCallable): """ + # {{{ constructor + def __init__(self, name=None, subkernel=None): super(CallableKernel, self).__init__(name=name) self.subkernel = subkernel + # }}} + + # {{{ copy + def copy(self, name=None, subkernel=None): if name is None: name = self.name @@ -426,6 +399,10 @@ class CallableKernel(InKernelCallable): return self.__class__(name=name, subkernel=subkernel) + # }}} + + # {{{ pre_process check + def pre_process_check(self, insn): """ Before the pre_processing of the parent kernel performs simple checks to @@ -444,10 +421,13 @@ class CallableKernel(InKernelCallable): if len(insn.expression.parameters) != len([a for a in self.arg_id_to_keyword if a >= 0]): raise LoopyError("The number of parameters do not match") + # }}} + + # {{{ with_types def with_types(self, arg_id_to_dtype): - # {{{ sanity checks for arg_id_to_dtype:w + # {{{ sanity checks for arg_id_to_dtype for id in arg_id_to_dtype: if not isinstance(id, str): @@ -456,32 +436,84 @@ class CallableKernel(InKernelCallable): # }}} - new_subkernel_args = [] + # Checking the input dtypes + for id, arg in self.subkernel.arg_dict.items(): + if id in self.subkernel.read_varibles(): - # I need to scan through all the ids and set the subkernel arguments - # accordingly. - 1/0 + # because we need the type of the parameters from the main kernel. It + # is necessary that we know the types from there. Hence asserting + # this condition + assert id in arg_id_to_dtype + new_arg_dict = {} for id, dtype in arg_id_to_dtype.items(): - # this just took and intereseting turn. - # So over here I need to do the type inference based on the given inputs. - # Wow - # I think this should be possible - # first run a few tryable kind of thinds in the ad-hoc terminal - dtype_in_subkernel = self.subkernel.arg_dict[id].dtype - if dtype_in_subkernel == (auto or None): - - if not dtype == dtype_in_subkernel: - return None + # Making the type of the new arg according to the arg which has been + # called in the function. + new_arg_dict[id] = self.subkernel.arg_dict[id].copy(dtype=dtype) - new_arg_id_to_dtype = arg_id_to_dtype.copy() - for id in self.subkernel.get_written_variables(): - if id not in arg_id_to_dtype: - new_arg_id_to_dtype[id] = self.subkerenel.arg_dict[id].dtype + # Merging the 2 dictionaries so that to even incorporate the variables that + # were not mentioned in arg_id_to_dtype. + new_arg_dict = {**self.subkernel.arg_dict, **new_arg_dict} - return self.copy() + # Preprocessing the kernel so that we can get the types of the other + # variables that are involved in the args + from loopy.type_inference import infer_unknown_types + pre_specialized_subkernel = self.subkernel.copy( + args=list(new_arg_dict.values)) -# }}} + # inferring the types of the written variables based on the knowledge of the + # types of the arguments supplied + specialized_kernel = infer_unknown_types(pre_specialized_subkernel, + expect_completion=True) + + new_arg_id_to_dtype = {} + for id, arg in specialized_kernel.arg_dict: + new_arg_id_to_dtype[id] = arg.dtype + + # Returning the kernel call with specialized subkernel and the corresponding + # new arg_id_to_dtype + return self.copy(subkernel=specialized_kernel), specialized_kernel.arg_dict + + # }}} + + def with_descriptors(self, arg_id_to_descr): + # Now what do I do? + # Take an argument from the other side requiring the stride of the new kernel + # to be, the number of + # is there a way to set the arguments of the kernel to be local + # I dont think there is anything which says that the argument is local and + # not global + + for id, arg_descr in arg_id_to_descr.items(): + # The dimensions don't match => reject it + if len(arg_descr.dim_tags) != len(self.subkernel.arg_dict[id].shape): + raise LoopyError("The number of dimensions do not match between the" + "caller kernel and callee kernel for the variable name %s in" + "the callee kernel" % id) + + new_args = [] + for arg in self.subkernel.args: + if arg.name in arg_id_to_descr: + new_args.copy(arg.copy(dim_tags=arg_id_to_descr[arg.name])) + pass + else: + new_args.append(arg.copy()) + + specialized_kernel = self.subkernel.copy(args=new_args) + + # I got the new subkernel by referring + new_arg_id_to_descr = 1/0 + # need to figure out a way to + + return specialized_kernel = self.copy(subkernel=specialized_kernel), + + def with_insn(self, insn): + # do the preprocess check + # match the type + # then finally match the stride/descrs + # not sure whether this function would be useful, but for the current time. + # stikcking this as an idea over here. + pass # vim: foldmethod=marker diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 8dabf64fc..d6f6116a7 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -71,7 +71,6 @@ def register_callable_kernel(parent, function_name, child): # }}} - from loopy.kernel.function_interface import CallableKernel new_auxiliary_kernels = parent.auxiliary_kernels new_auxiliary_kernels[function_name] = child -- GitLab From c037070b71db9214a54b1bd3b81aa63f05f9150d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Mar 2018 12:06:35 -0500 Subject: [PATCH 113/116] removed map-array-call from the codegen part as it was useless --- loopy/target/c/codegen/expression.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 3a467fef3..f13f66158 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -702,11 +702,6 @@ class ExpressionToCExpressionMapper(IdentityMapper): def map_local_hw_index(self, expr, type_context): raise LoopyError("plain C does not have local hw axes") - def map_array_call(self, expr, type_context): - # The call came over here, that means which means that the ArrayCall - # has been converted to an assignment. - return self.map_call(expr.func_call, type_context) - # }}} -- GitLab From d6b08a8fedac3e5fa7b98106f26bf5503ed6d788 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Mar 2018 12:11:42 -0500 Subject: [PATCH 114/116] added a bit to the other function_interface --- loopy/kernel/function_interface.py | 77 +++++++++++++----------------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index a18f4cd4d..6985276a3 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -5,7 +5,6 @@ import numpy as np from pytools import ImmutableRecord from loopy.diagnostic import LoopyError -from loopy.kernel.instruction import CallInstruction from loopy.types import NumpyType @@ -383,6 +382,10 @@ class CallableKernel(InKernelCallable): def __init__(self, name=None, subkernel=None): super(CallableKernel, self).__init__(name=name) + + if not name == subkernel.name: + subkernel = subkernel.copy(name=name) + self.subkernel = subkernel # }}} @@ -401,28 +404,6 @@ class CallableKernel(InKernelCallable): # }}} - # {{{ pre_process check - - def pre_process_check(self, insn): - """ - Before the pre_processing of the parent kernel performs simple checks to - check comptatibility - """ - - # instruction should be a CallInstruction - if not isinstance(insn, CallInstruction): - raise LoopyError("The given instruction should be a CallInstruction") - - # number of assignees should match - if len(insn.assignees) != len([a for a in self.arg_id_to_keyword if a < 0]): - raise LoopyError("The number of assignees do not match") - - # number of parameters should match - if len(insn.expression.parameters) != len([a for a in - self.arg_id_to_keyword if a >= 0]): - raise LoopyError("The number of parameters do not match") - # }}} - # {{{ with_types def with_types(self, arg_id_to_dtype): @@ -476,14 +457,9 @@ class CallableKernel(InKernelCallable): # }}} - def with_descriptors(self, arg_id_to_descr): - # Now what do I do? - # Take an argument from the other side requiring the stride of the new kernel - # to be, the number of - # is there a way to set the arguments of the kernel to be local - # I dont think there is anything which says that the argument is local and - # not global + # {{{ with_descriptors + def with_descriptors(self, arg_id_to_descr): for id, arg_descr in arg_id_to_descr.items(): # The dimensions don't match => reject it if len(arg_descr.dim_tags) != len(self.subkernel.arg_dict[id].shape): @@ -500,20 +476,33 @@ class CallableKernel(InKernelCallable): new_args.append(arg.copy()) specialized_kernel = self.subkernel.copy(args=new_args) - - # I got the new subkernel by referring - new_arg_id_to_descr = 1/0 - # need to figure out a way to - - return specialized_kernel = self.copy(subkernel=specialized_kernel), - - def with_insn(self, insn): - # do the preprocess check - # match the type - # then finally match the stride/descrs - # not sure whether this function would be useful, but for the current time. - # stikcking this as an idea over here. - pass + + new_arg_id_to_descr = {} + + for id, arg in specialized_kernel.arg_dict.items(): + new_arg_id_to_descr[id] = ArrayArgDescriptor(arg.dim_tags, "GLOBAL") + + return self.copy(subkernel=specialized_kernel), new_arg_id_to_descr + + # }}} + + # {{{ get_target_specific_name + + def get_target_specific_name(self, target): + return self.subkernel.name + + # }}} + + # {{{ get preamble + + def get_preamble(self, target): + return None + + # }}} + + +def specialized__common_return_type_call(identifier, arg_id_to_dtype): + # vim: foldmethod=marker -- GitLab From a8b462fdb11f25284b08fd5a8182e0349239e7fb Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Mar 2018 15:18:57 -0500 Subject: [PATCH 115/116] Heading out to other branch. for good.. :) --- loopy/kernel/function_interface.py | 9 +++------ loopy/target/c/__init__.py | 12 +----------- loopy/target/opencl.py | 10 +++++++++- loopy/transform/register_knl.py | 13 ++++++++++++- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 6985276a3..62085a704 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -286,7 +286,6 @@ class CommonReturnTypeCallable(InKernelCallable): # }}} - # {{{ specific type callable class @@ -362,6 +361,7 @@ class SpecificReturnTypeCallable(InKernelCallable): # }}} +# {{{ callable kernel class CallableKernel(InKernelCallable): """ @@ -496,13 +496,10 @@ class CallableKernel(InKernelCallable): # {{{ get preamble def get_preamble(self, target): - return None + return "" # }}} - -def specialized__common_return_type_call(identifier, arg_id_to_dtype): - - +# }}} # vim: foldmethod=marker diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 60c2ea940..173b91a31 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -375,7 +375,7 @@ class CMathCallable(CommonReturnTypeCallable): def get_preamble(self, target): assert isinstance(target, CTarget) - return r'#include "math.h"' + return r'#include ' def collect_c_generic_callables(): @@ -809,10 +809,6 @@ class CASTBuilder(ASTBuilderBase): return block_if_necessary(assignments) def emit_call(self, codegen_state, insn): - # TODO: Maybe we want this whole thing to be stuffed down within the - # Callable class definition. - 1/0 - # haha.. can't come over here till the whole thing has been handled ecm = codegen_state.expression_to_code_mapper from pymbolic.primitives import Variable @@ -834,12 +830,6 @@ class CASTBuilder(ASTBuilderBase): if insn.is_array_call: func_id = "{func}_{insn}".format(func=func_id, insn=insn.id) - # TODO: Since dont have an interface now for the kernel. Cheating ;) by - # having my own funciton signature created at the momnent - # Note that when you do the sort of handling make sure you also have - # something to handle the keywrods obtained over here. - # Wow this is too baaad(currently).... - # Make changes so that the assigneees have been assignees negative ids. arg_id_to_dtype = {} for id, dtype in enumerate(par_dtypes): arg_id_to_dtype[id] = dtype diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 2e4b23358..f4c699464 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -169,7 +169,7 @@ VECTOR_LITERAL_FUNCS = dict( class CLMathCallable(CommonReturnTypeCallable): def get_target_specific_name(self, target): if not self.is_ready_for_codegen(): - raise LoopyError("Trying to generate ") + raise LoopyError("The function %s is not ready for codegen" % self.name) assert isinstance(target, OpenCLTarget) if self.name in ["abs", "max", "min"]: @@ -206,9 +206,16 @@ def collect_cl_generic_callables(): function_dict[func] = CLMathCallable(name=func, kinds_allowed=['f'], arity=num_args) + for name in VECTOR_LITERAL_FUNCS: + base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name] + function_dict[name] = CLMathCallable(name, kinds_allowed=[dtype.kind], + arity=count) + return function_dict +''' +Dont think this is necessary anymore def collect_cl_specific_callables(kernel): function_dict = {} for name in VECTOR_LITERAL_FUNCS: @@ -222,6 +229,7 @@ def collect_cl_specific_callables(kernel): function_dict[name] = CLSpecificCallable(name, arg_id_to_dtype) return function_dict +''' # }}} diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index d6f6116a7..874361004 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -23,6 +23,7 @@ THE SOFTWARE. """ from loopy.kernel import LoopKernel +from loopy.diagnostic import LoopyError __doc__ = """ .. currentmodule:: loopy @@ -71,10 +72,20 @@ def register_callable_kernel(parent, function_name, child): # }}} + from loopy.kernel.function_interface import CallableKernel + callable_kernel = CallableKernel(name=function_name, subkernel=child) new_auxiliary_kernels = parent.auxiliary_kernels new_auxiliary_kernels[function_name] = child - return parent.copy(auxiliary_kernels=new_auxiliary_kernels) + # somehow need to add a new element to the dictionary of the parent_knl + new_callable_dict = parent.callable_dict + if function_name in new_callable_dict: + raise LoopyError("Cant assign the name of a kernel function with a default" + "function's name.") + + new_callable_dict[function_name] = callable_kernel + + return parent.copy(callable_dict=new_callable_dict) # }}} -- GitLab From 2418abf202f96029590e1d70de00d4130e4867b8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Mar 2018 15:22:58 -0500 Subject: [PATCH 116/116] [ci skip] Heading to other branch. --- loopy/kernel/function_interface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 62085a704..833cf57a7 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -363,6 +363,7 @@ class SpecificReturnTypeCallable(InKernelCallable): # {{{ callable kernel + class CallableKernel(InKernelCallable): """ -- GitLab