diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 343c850149df72f90c6860aa0b5db16fa1528718..ae18a9294cfe203dde5800b4e8eb7db1900847d3 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1897,8 +1897,12 @@ class FunctionScoper(IdentityMapper): from loopy.symbolic import Reduction + # Adding _reduce at the end of the reduction in order to avoid + # confusion between reduce(max, ...) and max(a, b) in the + # `scoped_functions` dictionary. + return Reduction( - ScopedFunction(expr.function.name), + ScopedFunction(expr.function.name+"_reduce"), tuple(new_inames), self.rec(expr.expr), allow_simultaneous=expr.allow_simultaneous) @@ -1921,7 +1925,10 @@ class ScopedFunctionCollector(CombineMapper): from loopy.kernel.function_interface import CallableOnScalar from loopy.symbolic import Reduction - callable_reduction = CallableReduction(expr.function.name) + # Refer to map_reduction subroutine of FunctionScoper. + assert expr.function.name[-7:] == "_reduce" + + callable_reduction = CallableReduction(expr.function.name[:-7]) # sanity checks @@ -1986,7 +1993,7 @@ def scope_functions(kernel): else: raise NotImplementedError("scope_functions not implemented for %s" % type(insn)) - + # Need to combine the scoped functions into a dict scoped_function_dict = dict(scoped_functions) return kernel.copy(instructions=new_insns, scoped_functions=scoped_function_dict) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 5066cff5c753089d78f3eb688f124a49ad2160c2..2fbb931cb5b37f0f1f0da3925b9c5bb95ab235f5 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -566,11 +566,14 @@ def next_indexed_name(name): class FunctionScopeChanger(IdentityMapper): - #TODO: Make it sophisticated as in I don't like the if-else systems. Needs + # TODO: Make it sophisticated as in I don't like the if-else systems. Needs # something else. + # Explain what this is doing. + # The name should be more like "NameChanger" more like "GameChanger" LOl. + # Wow my jokes are baaad. Anyways back to work!! + def __init__(self, new_names): self.new_names = new_names - self.new_names_set = frozenset(new_names.values()) def map_call(self, expr): if expr in self.new_names: @@ -594,6 +597,18 @@ class FunctionScopeChanger(IdentityMapper): else: return IdentityMapper.map_call_with_kwargs(self, expr) + def map_reduction(self, expr): + from loopy.symbolic import Reduction + + if self.new_names: + return Reduction( + ScopedFunction(self.new_names[expr]), + tuple(expr.inames), + self.rec(expr.expr), + allow_simultaneous=expr.allow_simultaneous) + else: + return IdentityMapper.map_reduction(self, expr) + def register_pymbolic_calls_to_knl_callables(kernel, pymbolic_calls_to_knl_callables): @@ -654,5 +669,4 @@ def register_pymbolic_calls_to_knl_callables(kernel, # }}} - # vim: foldmethod=marker diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index f4444c8864a44840f624c63ab282769e0f321a9a..f1c5607fe27c750a8ec0ab9b3b22a2eb7e664034 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -428,8 +428,8 @@ def get_argext_preamble(kernel, func_id, arg_dtypes): _REDUCTION_OPS = { "sum": SumReductionOperation, "product": ProductReductionOperation, - "maximum": MaxReductionOperation, - "minimum": MinReductionOperation, + "max": MaxReductionOperation, + "min": MinReductionOperation, "argmax": ArgMaxReductionOperation, "argmin": ArgMinReductionOperation, "segmented(sum)": SegmentedSumReductionOperation, diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 3f3c1c472a8936a70dcfdda651478f47d53bcb4b..8950f15900d43ab28c258b650330b7a6b1cc613f 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -2357,7 +2357,22 @@ class ReadyForCodegen(CombineMapper): map_function_symbol = map_constant -def try_making_callable_ready_for_codegen(kernel): +def specializing_incomplete_callables(kernel): + """ + Transformation necessary to type-specialize the callables which are missed + in type inference. For example consider: + ``` + knl = lp.make_kernel( + "{[i]: 0<=i<16}", + "a[i] = sin[b[i]]", + [lp.GlobalArg('a', dtype=np.float64), + lp.GlobalArg('b', dtype=np.float64)]) + ``` + In this case, none of the instructions undergo type inference as the type + inference is already resolved. But this would be a problem during + code-generation as `sin` is not type specialized. + + """ from loopy.type_inference import TypeInferenceMapper from loopy.symbolic import SubstitutionRuleExpander from loopy.kernel.function_interface import ( @@ -2462,7 +2477,6 @@ def preprocess_kernel(kernel, device=None): # - realize_reduction must happen after default dependencies are added # because it manipulates the depends_on field, which could prevent # defaults from being applied. - kernel = realize_reduction(kernel, unknown_types_ok=False) # inferring the shape and dim_tags of the arguments involved in a function @@ -2470,7 +2484,7 @@ def preprocess_kernel(kernel, device=None): kernel = infer_arg_descr(kernel) # try specializing callables one last time. - kernel = try_making_callable_ready_for_codegen(kernel) + kernel = specializing_incomplete_callables(kernel) # Ordering restriction: # add_axes_to_temporaries_for_ilp because reduction accumulators diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 32670c1cc26bef1dacc18c0a46f70c92a00d9405..831bab5c21ccfccb4d65465eb6dd6335c7b938b5 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -616,7 +616,7 @@ class Reduction(p.Expression): self.allow_simultaneous = allow_simultaneous def __getinitargs__(self): - return (self.funciton, self.inames, self.expr, self.allow_simultaneous) + return (self.function, self.inames, self.expr, self.allow_simultaneous) def get_hash(self): return hash((self.__class__, self.function, self.inames, self.expr))