From 50be51a06e4ffc12d3948f190bff6cff5c2012b2 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 8 May 2018 15:34:14 +0100 Subject: [PATCH 01/13] start working on opaque types --- loopy/codegen/__init__.py | 5 ++++- loopy/preprocess.py | 6 +++++- loopy/target/c/__init__.py | 4 +++- loopy/types.py | 16 ++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index e5938dbc4..fcd170316 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -478,9 +478,12 @@ def generate_code_v2(kernel): else: raise ValueError("argument type not understood: '%s'" % type(arg)) + from loopy.types import OpaqueType + allow_complex = False for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): - if var.dtype.involves_complex(): + dtype = var.dtype + if not isinstance(dtype, OpaqueType) and dtype.involves_complex(): allow_complex = True # }}} diff --git a/loopy/preprocess.py b/loopy/preprocess.py index c4719ace5..1d5f8c130 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -51,13 +51,17 @@ logger = logging.getLogger(__name__) def prepare_for_caching(kernel): import loopy as lp + from loopy.types import OpaqueType new_args = [] tgt = kernel.target for arg in kernel.args: dtype = arg.dtype - if dtype is not None and dtype is not lp.auto and dtype.target is not tgt: + if (dtype is not None + and not isinstance(dtype, OpaqueType) + and dtype is not lp.auto + and dtype.target is not tgt): arg = arg.copy(dtype=dtype.with_target(kernel.target)) new_args.append(arg) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 9be9db38c..366d167da 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -62,11 +62,13 @@ class DTypeRegistryWrapper(object): return self.wrapped_registry.get_or_register_dtype(names, dtype) def dtype_to_ctype(self, dtype): - from loopy.types import LoopyType, NumpyType + from loopy.types import LoopyType, NumpyType, OpaqueType assert isinstance(dtype, LoopyType) if isinstance(dtype, NumpyType): return self.wrapped_registry.dtype_to_ctype(dtype) + elif isinstance(dtype, OpaqueType): + return dtype.name else: raise LoopyError( "unable to convert type '%s' to C" diff --git a/loopy/types.py b/loopy/types.py index 8f0f310c3..de7890aa8 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -177,6 +177,22 @@ class AtomicNumpyType(NumpyType, AtomicType): # }}} +# {{{ + +class OpaqueType(LoopyType): + def __init__(self, name): + assert isinstance(name, str) + self.name = name + + def is_integral(self): + return False + + def is_complex(self): + return False + +# }}} + + def to_loopy_type(dtype, allow_auto=False, allow_none=False, for_atomic=False, target=None): from loopy.kernel.data import auto -- GitLab From b4498bc0c55b7add93506176c2b935e508880cb9 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 25 May 2018 11:34:34 +0100 Subject: [PATCH 02/13] const type inference --- loopy/type_inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 53d7074f7..c05cdb2c1 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -314,6 +314,7 @@ class TypeInferenceMapper(CombineMapper): continue # }}} + continue raise LoopyError("Overwriting a specialized function " "is illegal--maybe start with new instance of " -- GitLab From a911a9a38694be8aa1f36ba9d0db13f7fc3ef3c7 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 7 Jun 2018 08:25:41 +0100 Subject: [PATCH 03/13] bypass argument checking for inlining --- loopy/kernel/function_interface.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 089b6cb36..b48d99001 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -518,16 +518,21 @@ class KernelInliner(SubstitutionMapper): for idx, tag in zip(outer_indices, callee_arg.dim_tags)) from loopy.isl_helpers import simplify_via_aff - flatten_index = simplify_via_aff(flatten_index) + try: + flatten_index = simplify_via_aff(flatten_index) + except: + pass new_indices = [] for dim_tag in caller_arg.dim_tags: ind = flatten_index // dim_tag.stride flatten_index -= (dim_tag.stride * ind) + try: + ind = simplify_via_aff(ind) + except: + pass new_indices.append(ind) - new_indices = tuple(simplify_via_aff(i) for i in new_indices) - return aggregate.index(tuple(new_indices)) else: return super(KernelInliner, self).map_subscript(expr) @@ -696,7 +701,10 @@ class CallableKernel(InKernelCallable): raise LoopyError("Descriptor must be either an instance of " "ArrayArgDescriptor or ValueArgDescriptor -- got %s." % type(descr)) - descriptor_specialized_knl = self.subkernel.copy(args=new_args) + if self.should_inline: + descriptor_specialized_knl = self.subkernel.copy() + else: + descriptor_specialized_knl = self.subkernel.copy(args=new_args) return self.copy(subkernel=descriptor_specialized_knl, arg_id_to_descr=arg_id_to_descr) @@ -900,6 +908,8 @@ class CallableKernel(InKernelCallable): new_insns.append(insn) kernel = kernel.copy(instructions=new_insns) + # TODO: resolve name clash here + kernel.scoped_functions.update(callee_knl.scoped_functions) # }}} -- GitLab From cad54af88ff40afa88edfdcee9c0cea4875c32a4 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Mon, 18 Jun 2018 18:27:06 +0100 Subject: [PATCH 04/13] rebase to kernel_callable --- loopy/check.py | 2 +- loopy/kernel/function_interface.py | 5 +---- loopy/symbolic.py | 10 +++++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 4a340e6dd..60d2fd698 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -729,7 +729,7 @@ def pre_schedule_checks(kernel): check_for_data_dependent_parallel_bounds(kernel) check_bounds(kernel) check_write_destinations(kernel) - check_has_schedulable_iname_nesting(kernel) + # check_has_schedulable_iname_nesting(kernel) check_variable_access_ordered(kernel) logger.debug("%s: pre-schedule check: done" % kernel.name) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index b48d99001..8363ee810 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -701,10 +701,7 @@ class CallableKernel(InKernelCallable): raise LoopyError("Descriptor must be either an instance of " "ArrayArgDescriptor or ValueArgDescriptor -- got %s." % type(descr)) - if self.should_inline: - descriptor_specialized_knl = self.subkernel.copy() - else: - descriptor_specialized_knl = self.subkernel.copy(args=new_args) + descriptor_specialized_knl = self.subkernel.copy() return self.copy(subkernel=descriptor_specialized_knl, arg_id_to_descr=arg_id_to_descr) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 09e6e5747..8800f2845 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -848,9 +848,13 @@ class SubArrayRef(p.Expression): from loopy.isl_helpers import simplify_via_aff sub_dim_tags = [] sub_shape = [] - linearized_index = simplify_via_aff( - sum(dim_tag.stride*iname for dim_tag, iname in - zip(arg.dim_tags, self.subscript.index_tuple))) + linearized_index = sum(dim_tag.stride*iname + for dim_tag, iname + in zip(arg.dim_tags, self.subscript.index_tuple)) + try: + linearized_index = simplify_via_aff(linearized_index) + except: + pass strides_as_dict = SweptInameStrideCollector(tuple(iname.name for iname in self.swept_inames))(linearized_index) -- GitLab From b06efc14202b21a93571993b593b12aacd9d2bf8 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 20 Jun 2018 19:29:06 +0100 Subject: [PATCH 05/13] try simplifying with integer variables --- loopy/kernel/function_interface.py | 6 +++--- loopy/symbolic.py | 14 ++++++++++++-- loopy/transform/register_callable.py | 2 ++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 8363ee810..e85a83d37 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -517,9 +517,9 @@ class KernelInliner(SubstitutionMapper): idx * tag.stride for idx, tag in zip(outer_indices, callee_arg.dim_tags)) - from loopy.isl_helpers import simplify_via_aff + from loopy.symbolic import simplify_using_aff try: - flatten_index = simplify_via_aff(flatten_index) + flatten_index = simplify_using_aff(self.caller, flatten_index) except: pass @@ -528,7 +528,7 @@ class KernelInliner(SubstitutionMapper): ind = flatten_index // dim_tag.stride flatten_index -= (dim_tag.stride * ind) try: - ind = simplify_via_aff(ind) + ind = simplify_using_aff(self.caller, ind) except: pass new_indices.append(ind) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8800f2845..47bdc4e30 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1671,7 +1671,8 @@ def guarded_pwaff_from_expr(space, expr, vars_to_zero=None): # {{{ simplify using aff def simplify_using_aff(kernel, expr): - inames = get_dependencies(expr) & kernel.all_inames() + deps = get_dependencies(expr) + inames = deps & kernel.all_inames() domain = kernel.get_inames_domain(inames) @@ -1685,7 +1686,16 @@ def simplify_using_aff(kernel, expr): except TypeError: return expr except UnknownVariableError: - return expr + integers = deps & set(t for t, v in kernel.temporary_variables.items() if np.issubdtype(v.dtype, np.integer)) + names = sorted(list(integers)) # need to sort for deterministic code generation + nd = domain.dim(isl.dim_type.set) + domain = domain.add_dims(isl.dim_type.set, len(names)) + for i, name in enumerate(names): + domain = domain.set_dim_name(isl.dim_type.set, nd + i, name) + try: + aff = aff_from_expr(domain.space, expr) + except: + return expr # FIXME: Deal with assumptions, too. aff = aff.gist(domain) diff --git a/loopy/transform/register_callable.py b/loopy/transform/register_callable.py index 455c2e51e..449a53f92 100644 --- a/loopy/transform/register_callable.py +++ b/loopy/transform/register_callable.py @@ -206,6 +206,8 @@ class DimChanger(IdentityMapper): self.desired_shape = desired_shape def map_subscript(self, expr): + if expr.aggregate.name not in self.callee_arg_dict: + return super(DimChanger, self).map_subscript(expr) callee_arg_dim_tags = self.callee_arg_dict[expr.aggregate.name].dim_tags flattened_index = sum(dim_tag.stride*idx for dim_tag, idx in zip(callee_arg_dim_tags, expr.index_tuple)) -- GitLab From 335fa5f69cc2cdae00c4b55b62b0695988b498fa Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 28 Jun 2018 10:39:36 +0100 Subject: [PATCH 06/13] minor changes --- loopy/symbolic.py | 4 ++-- loopy/target/c/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 47bdc4e30..6024d334d 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1686,8 +1686,8 @@ def simplify_using_aff(kernel, expr): except TypeError: return expr except UnknownVariableError: - integers = deps & set(t for t, v in kernel.temporary_variables.items() if np.issubdtype(v.dtype, np.integer)) - names = sorted(list(integers)) # need to sort for deterministic code generation + integer_vars = deps & set(t for t, v in kernel.temporary_variables.items() if np.issubdtype(v.dtype, np.integer)) + names = sorted(list(integer_vars)) # need to sort for deterministic code generation nd = domain.dim(isl.dim_type.set) domain = domain.add_dims(isl.dim_type.set, len(names)) for i, name in enumerate(names): diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 366d167da..545f8d925 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -453,7 +453,7 @@ def scope_c_math_functions(target, identifier): represented by :arg:`identifier` is known in C, otherwise returns *None*. """ if identifier in ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", - "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor", "max", "min"]: + "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor", "max", "min", "fmax", "fmin"]: return CMathCallable(name=identifier) return None -- GitLab From 7039a728ba4f96dd1ac0d1098d1033ae48a173a4 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 28 Jun 2018 13:51:58 +0100 Subject: [PATCH 07/13] add more C math functions --- loopy/target/c/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 545f8d925..6a8befa95 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -409,7 +409,7 @@ class CMathCallable(ScalarCallable): arg_id_to_dtype={0: NumpyType(dtype), -1: NumpyType(dtype)}) # binary functions - if name in ["fmax", "fmin"]: + if name in ["fmax", "fmin", "pow", "atan2"]: for id in arg_id_to_dtype: if not -1 <= id <= 1: @@ -428,7 +428,7 @@ class CMathCallable(ScalarCallable): if dtype.kind == "c": raise LoopyTypeError("%s does not support complex numbers") - elif dtype.kind == "f": + elif dtype.kind == "f" and name in ["fmax", "fmin"]: from loopy.target.opencl import OpenCLTarget if not isinstance(kernel.target, OpenCLTarget): if dtype == np.float64: @@ -452,8 +452,10 @@ def scope_c_math_functions(target, identifier): Returns an instance of :class:`InKernelCallable` if the function represented by :arg:`identifier` is known in C, otherwise returns *None*. """ - if identifier in ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", - "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor", "max", "min", "fmax", "fmin"]: + if identifier in ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", + "sinh", "pow", "atan2", "tanh", "exp", "log", "log10", + "sqrt", "ceil", "floor", "max", "min", "fmax", "fmin", + "fabs"]: return CMathCallable(name=identifier) return None -- GitLab From 88395a731c044d32a8d54da6ee8be5bd9061646b Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 28 Jun 2018 14:19:56 +0100 Subject: [PATCH 08/13] updates based on discussion on gitlab --- loopy/codegen/__init__.py | 4 +--- loopy/kernel/function_interface.py | 1 - loopy/types.py | 6 ++++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index fcd170316..830718465 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -478,12 +478,10 @@ def generate_code_v2(kernel): else: raise ValueError("argument type not understood: '%s'" % type(arg)) - from loopy.types import OpaqueType - allow_complex = False for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): dtype = var.dtype - if not isinstance(dtype, OpaqueType) and dtype.involves_complex(): + if dtype.involves_complex(): allow_complex = True # }}} diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index e85a83d37..3f9a84675 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -905,7 +905,6 @@ class CallableKernel(InKernelCallable): new_insns.append(insn) kernel = kernel.copy(instructions=new_insns) - # TODO: resolve name clash here kernel.scoped_functions.update(callee_knl.scoped_functions) # }}} diff --git a/loopy/types.py b/loopy/types.py index de7890aa8..d52e029a5 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -180,9 +180,15 @@ class AtomicNumpyType(NumpyType, AtomicType): # {{{ class OpaqueType(LoopyType): + """An opaque data type is truly opaque - it has no allocations, no + temporaries of that type, etc. The only thing allowed is to be pass in + through one ValueArg and go out to another. It is introduced to accomodate + functional calls to external libraries. + """ def __init__(self, name): assert isinstance(name, str) self.name = name + self.target = None def is_integral(self): return False -- GitLab From b8932a50e145ddeb6e105e38bc94a8bf899a5945 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 29 Jun 2018 19:48:37 +0100 Subject: [PATCH 09/13] minor update --- loopy/types.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/types.py b/loopy/types.py index d52e029a5..59d605c85 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -196,6 +196,9 @@ class OpaqueType(LoopyType): def is_complex(self): return False + def involves_complex(self): + return False + # }}} -- GitLab From 86b76919582f9a01207af7789cfca4be9cf0bf49 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 5 Jul 2018 17:32:02 +0100 Subject: [PATCH 10/13] minor (temp) changes --- loopy/check.py | 2 +- loopy/target/c/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 60d2fd698..ab7f430ef 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -730,7 +730,7 @@ def pre_schedule_checks(kernel): check_bounds(kernel) check_write_destinations(kernel) # check_has_schedulable_iname_nesting(kernel) - check_variable_access_ordered(kernel) + # check_variable_access_ordered(kernel) logger.debug("%s: pre-schedule check: done" % kernel.name) except KeyboardInterrupt: diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 6a8befa95..681914986 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -455,7 +455,7 @@ def scope_c_math_functions(target, identifier): if identifier in ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "pow", "atan2", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor", "max", "min", "fmax", "fmin", - "fabs"]: + "fabs", "tan"]: return CMathCallable(name=identifier) return None -- GitLab From 4ab87c223d888950db30e3efca9b12afa3bc552f Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 10 Jul 2018 13:06:15 +0100 Subject: [PATCH 11/13] hash builder for opaque type --- loopy/types.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/types.py b/loopy/types.py index 59d605c85..0a08b8a81 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -199,6 +199,9 @@ class OpaqueType(LoopyType): def involves_complex(self): return False + def update_persistent_hash(self, key_hash, key_builder): + key_builder.rec(key_hash, self.name) + # }}} -- GitLab From 1bc7cf4a91fdf118eb062af827f80d94a94c8ada Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 17 Aug 2018 17:29:39 +0100 Subject: [PATCH 12/13] compare opaque types --- loopy/types.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/loopy/types.py b/loopy/types.py index 0a08b8a81..4e77317c1 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -202,6 +202,17 @@ class OpaqueType(LoopyType): def update_persistent_hash(self, key_hash, key_builder): key_builder.rec(key_hash, self.name) + def __hash__(self): + return hash(self.name) + + def __eq__(self, other): + return ( + type(self) == type(other) + and self.name == other.name) + + def __ne__(self, other): + return not self.__eq__(other) + # }}} -- GitLab From 58ed15782da92bd25474721b07be6c460ccd8fdf Mon Sep 17 00:00:00 2001 From: tj-sun Date: Mon, 20 Aug 2018 19:53:06 +0100 Subject: [PATCH 13/13] need to look into comparisions for scoped function --- loopy/type_inference.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index c05cdb2c1..9254ecbb5 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -467,11 +467,15 @@ class TypeInferenceMapper(CombineMapper): def map_comparison(self, expr): # "bool" is unusable because OpenCL's bool has indeterminate memory # format. + self(expr.left, return_tuple=False, return_dtype_set=False) + self(expr.right, return_tuple=False, return_dtype_set=False) return [NumpyType(np.dtype(np.int32))] - map_logical_not = map_comparison - map_logical_and = map_comparison - map_logical_or = map_comparison + def map_logical_not(self, expr): + return [NumpyType(np.dtype(np.int32))] + + map_logical_and = map_logical_not + map_logical_or = map_logical_not def map_group_hw_index(self, expr, *args): return [self.kernel.index_dtype] -- GitLab